First article supporting material

Crude and age-sex-year adjusted mortality rates and ratios and net survival

Author

Andrés González Santa Cruz

Published

August 21, 2025


Data Loading and Exploration

Loading Packages and uniting databases

Proceed to load the necessary packages.

Code
# --- Bootstrap reticulate con ruta relativa a getwd() ---
suppressPackageStartupMessages(library(reticulate))

# Busca .mamba_root/envs/py311/python.exe desde getwd() hacia padres
find_python_rel <- function(start = getwd(),
                            rel = file.path(".mamba_root","envs","py311","python.exe")) {
  cur <- normalizePath(start, winslash = "/", mustWork = FALSE)
  repeat {
    cand <- normalizePath(file.path(cur, rel), winslash = "/", mustWork = FALSE)
    if (file.exists(cand)) return(cand)
    parent <- dirname(cur)
    if (identical(parent, cur)) return(NA_character_)  # llegó a la raíz
    cur <- parent
  }
}

py <- find_python_rel()

if (is.na(py)) {
  stop("No se encontró Python relativo a getwd() (buscando '.mamba_root/envs/py311/python.exe').\n",
       "Directorio actual: ", getwd())
}

# Forzar ese intérprete
Sys.unsetenv(c("RETICULATE_CONDAENV","RETICULATE_PYTHON_FALLBACK"))
Sys.setenv(RETICULATE_PYTHON = py)
use_python(py, required = TRUE)

py_config()  # verificación

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#clean enviroment
rm(list = ls()); gc()
file.path(paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))),"data/20241015_out"))

wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
wdpath

envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
envpath

time_before_dedup2<-Sys.time()

#base::load(paste0(wdpath,"data/20241015_out/","3_ndp_2025_05_30.Rdata"))
if (!(Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv"))) {
  file.path(paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))),"data/20241015_out"))
  
  wdpath<-
  paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
  wdpath
  
  envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
  envpath 
  
  base::load(paste0(wdpath,"data/20241015_out/","4_ndp_2025_06_06.Rdata"))

  } else {
    file.path(paste0(getwd(),"/_input"))
    paste0(getwd(),"/_input","/4_ndp_2025_06_06.Rdata")
    base::load(paste0(getwd(),"/_input","/4_ndp_2025_06_06.Rdata.enc"))
  }

time_before_dedup1<-Sys.time()
password <- Sys.getenv("PASS_PPIO")
system(sprintf("7z x path/to/_input/4_ndp_2025_06_06.Rdata.7z.001 -p'%s'", password))

try(rm("HOSP_filter_pl_filt"))
python:         G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe
libpython:      G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python311.dll
pythonhome:     G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311
version:        3.11.5 | packaged by conda-forge | (main, Aug 27 2023, 03:23:48) [MSC v.1936 64 bit (AMD64)]
Architecture:   64bit
numpy:           [NOT FOUND]

NOTE: Python version was forced by RETICULATE_PYTHON
          used (Mb) gc trigger  (Mb) max used  (Mb)
Ncells 1739268 92.9    2609282 139.4  2609282 139.4
Vcells 3212755 24.6    8388608  64.0  5079521  38.8
[1] "G:/My Drive/Alvacast/SISTRAT 2023//data/20241015_out"
[1] "G:/My Drive/Alvacast/SISTRAT 2023//"
[1] "G:/Mi unidad/Alvacast/SISTRAT 2023/"
[1] 127
Code
#https://github.com/rstudio/renv/issues/544
#renv falls back to copying rather than symlinking, which is evidently very slow in this configuration.
renv::settings$use.cache(FALSE)

#only use explicit dependencies (in DESCRIPTION)
renv::settings$snapshot.type("implicit")

#check if rstools is installed
try(installr::install.Rtools(check_r_update=F))

Installing package into ‘G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32’ (as ‘lib’ is unspecified)

Code
if(quarto::quarto_version()<"1.7.29"){
stop("You need to install a recent quarto version")   # la publicada el 28-abr-2025
}

#change repository to CL
local({
  r <- getOption("repos")
  r["CRAN"] <- "https://cran.dcc.uchile.cl/"
  options(repos=r)
})

if(!require(pacman)){install.packages("pacman");require(pacman)}

Cargando paquete requerido: pacman

Code
if(!require(pak)){install.packages("pak");require(pak)}

Cargando paquete requerido: pak

Code
pacman::p_unlock(lib.loc = .libPaths()) #para no tener problemas reinstalando paquetes

No 00LOCK detected in: G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32 No 00LOCK detected in: C:/Program Files/R/R-4.4.1/library

Code
if(Sys.info()["sysname"]=="Windows"){
if (getRversion() != "4.4.1") { stop("Requires R version 4.4.1; Actual: ", getRversion()) }
}

#check docker
check_docker_running <- function() {
  # Try running 'docker info' to check if Docker is running
  system("docker info", intern = TRUE, ignore.stderr = TRUE)
}

install_docker <- function() {
  # Open the Docker Desktop download page in the browser for installation
  browseURL("https://www.docker.com/products/docker-desktop")
}

# Main logic
if (inherits(try(check_docker_running(), silent = TRUE), "try-error")) {
  liftr::install_docker()
} else {
  message("Docker is running.")
}

Warning in system(“docker info”, intern = TRUE, ignore.stderr = TRUE): el comando ejecutado ‘docker info’ tiene el estatus 1

Docker is running.

Code
#Registrar el font
windowsFonts(`Times New Roman` = windowsFont("TT Times New Roman"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PACKAGES#######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

unlink("*_cache", recursive=T)

# ----------------------------------------------------------------------
# 2. Use a single pak::pkg_install() call for most CRAN packages
# ----------------------------------------------------------------------

paks <-
  c(#"git", 
    # To connect to github
    "gh", #interface for  GitHub API from R
    #
    "gitcreds", # manages Git credentials (usernames, passwords, tokens)
    #
    "usethis", # simplifies common project setup tasks for R developers
    # Package to bring packages in development
    "devtools",
    # Package administration
    "renv",
    # To manipulate data
    "knitr", "pander", "DT",
    # Join
    "fuzzyjoin", "RecordLinkage",
    # For tables
    "tidyverse", "janitor",
    # For contingency tables
    "kableExtra",
    # For connections with python
    "reticulate",
    # To manipulate big data
    "polars", "sqldf",
    # To bring big databases
    "nanoparquet",
    # Interface for R and RStudio in R
    "installr", "rmarkdown", "quarto", "yaml", #"rstudioapi",
    # Time handling
    "clock",
    # Combine plots
    "ggpubr",
    # Parallelized iterative processing
    "furrr",
    # Work like a tibble with a data.table database
    "tidytable",
    # Split database into training and testing
    "caret",
    # Impute missing data
    "missRanger", "mice",
    # To modularize tasks
    "job",
    # For PhantomJS install checks
    "webshot"
  )

# dplyr
# janitor
# reshape2
# tidytable
# arrow
# boot
# broom
# car
# caret
# data.table
# DiagrammeR
# DiagrammeRsvg
# dplyr
# epiR
# epitools
# ggplot2
# glue
# htmlwidgets
# knitr
# lubridate
# naniar
# parallel
# polycor
# pROC
# psych
# readr
# rio
# rsvg
# scales
# stringr
# tableone
# rmarkdown
# biostat3
# codebook
# finalfit
# Hmisc
# kableExtra
# knitr
# devtools
# tidyr
# stringi
# stringr
# muhaz
# sqldf
# compareGroups
# survminer
# lubridate
# ggfortify
# car
# fuzzyjoin
# compareGroups
# caret
# job
# htmltools
# nanoparquet
# ggpubr
# polars
# installr
# clock
# pander
# reshape
# mice
# missRanger
# VIM
# withr
# biostat3
# broom
# glue
# finalfit
# purrr
# sf


# pak::pkg_install(paks)

pak::pak_sitrep()
# pak::sysreqs_check_installed(unique(unlist(paks)))
#pak::lockfile_create(unique(unlist(paks)),  "dependencies_duplicates24.lock", dependencies=T)
#pak::lockfile_install("dependencies_duplicates24.lock")
#https://rdrr.io/cran/pak/man/faq.html
#pak::cache_delete()

library(tidytable)

Adjuntando el paquete: ‘tidytable’

The following objects are masked from ‘package:stats’:

dt, filter, lag

The following object is masked from ‘package:base’:

%in%
Code
library(polars)

Warning: package ‘polars’ was built under R version 4.4.3

Code
library(ggplot2)
library(readr)


if (!require("pacman")) install.packages("pacman")

pacman::p_load(
  mexhaz,      # Flexible parametric hazard regression models for survival analysis
  tidyverse,   # Collection of packages for data manipulation, visualization, and more (includes dplyr, ggplot2, tidyr, etc.)
  janitor,     # Simple tools for examining and cleaning dirty data
  tableone,    # Create "Table 1" summaries for descriptive statistics in medical research
  cowplot,     # Streamlined plot theme and plot annotations for ggplot2
  grid,        # Base R package for low-level graphics functions (used for arranging plots)
  rio,         # Simplifies data import/export with a consistent interface
  coin,        # Conditional inference procedures for hypothesis testing
  kableExtra,  # Enhances 'knitr::kable()' for creating complex tables in R Markdown
  epitools,    # Epidemiological tools for data analysis
  relsurv,     # Relative survival analysis for population-based cancer studies
  survminer,   # Survival analysis and visualization based on 'survival' package
  biostat3,    # Biostatistics functions and datasets for teaching and research
  tableone,    # (Repeated) Create descriptive summary tables for clinical research
  popEpi,      # For SMRs and SIR
  metafor,     # For heterogeneity test /Cochrane Q
  parallel,    # Parallel computing (for bootstrap)
  install = T  # Automatically install packages if not already installed
)

# ----------------------------------------------------------------------
# 3. Activate polars code completion (safe to try even if it fails)
# ----------------------------------------------------------------------
try(polars_code_completion_activate())

Using code completion in ‘native’ mode.

Code
# ----------------------------------------------------------------------
# 4. BPMN from GitHub (not on CRAN, so install via devtools if missing)
# ----------------------------------------------------------------------
if (!requireNamespace("bpmn", quietly = TRUE)) {
  devtools::install_github("bergant/bpmn")
}

# ----------------------------------------------------------------------
# 5. PhantomJS Check (use webshot if PhantomJS is missing)
# ----------------------------------------------------------------------
# if (!webshot::is_phantomjs_installed()) {
#   webshot::install_phantomjs()
# }

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#FUNCTIONS######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

copiar_nombres <- function(x,row.names=FALSE,col.names=TRUE,dec=",",...) {
  if(class(try(dplyr::ungroup(x)))[1]=="tbl_df"){
    if(options()$OutDec=="."){
      options(OutDec = dec)
      write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
      options(OutDec = ".")
      return(x)
    } else {
      options(OutDec = ",")
      write.table(format(data.frame(x)),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
      options(OutDec = ",")
      return(x)    
    }
  } else {
    if(options()$OutDec=="."){
      options(OutDec = dec)
      write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
      options(OutDec = ".")
      return(x)
    } else {
      options(OutDec = ",")
      write.table(format(x),"clipboard",sep="\t",row.names=FALSE,col.names=col.names,...)
      options(OutDec = ",")
      return(x)       
    }
  }
}  

#WINDOWS do not restrict memory size
if(.Platform$OS.type == "windows") withAutoprint({
  memory.size()
  memory.size(TRUE)
  memory.limit()
})

Warning: ‘memory.size()’ is no longer supported

Warning: ‘memory.size()’ is no longer supported

Warning: ‘memory.limit()’ is no longer supported

Code
memory.limit(size=56000)

Warning: ‘memory.limit()’ is no longer supported

Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#NAs are replaced with "" in knitr kable
options(knitr.kable.NA = '')

pander::panderOptions('big.mark', ',')
pander::panderOptions('decimal.mark', '.')

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#
#to format rows in bold
format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){

  # select the correct markup
  # one * for italics, two ** for bold
  map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough"))
  markup <- map[value]  

  for (r in rows){
    for(c in cols){

      # Make sure values are not factors
      df[[c]] <- as.character( df[[c]])

      # Update formatting
      df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup))
    }
  }

  return(df)
}
#To produce line breaks in messages and warnings
knitr::knit_hooks$set(
   error = function(x, options) {
     paste('\n\n<div class="alert alert-danger" style="font-size: small !important;">',
           gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
           '</div>', sep = '\n')
   },
   warning = function(x, options) {
     paste('\n\n<div class="alert alert-warning" style="font-size: small !important;">',
           gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
           '</div>', sep = '\n')
   },
   message = function(x, options) {
     paste('<div class="message" style="font-size: small !important;">',
           gsub('##', '\n', x),
           '</div>', sep = '\n')
   }
)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

sum_dates <- function(x){
  
  cbind.data.frame(
min= as.Date(min(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01"),
p001= as.Date(quantile(unclass(as.Date(x)), .001, na.rm=T), origin = "1970-01-01"),
p005= as.Date(quantile(unclass(as.Date(x)), .005, na.rm=T), origin = "1970-01-01"),
p025= as.Date(quantile(unclass(as.Date(x)), .025, na.rm=T), origin = "1970-01-01"),
p25= as.Date(quantile(unclass(as.Date(x)), .25, na.rm=T), origin = "1970-01-01"),
p50= as.Date(quantile(unclass(as.Date(x)), .5, na.rm=T), origin = "1970-01-01"),
p75= as.Date(quantile(unclass(as.Date(x)), .75, na.rm=T), origin = "1970-01-01"),
p975= as.Date(quantile(unclass(as.Date(x)), .975, na.rm=T), origin = "1970-01-01"),
p995= as.Date(quantile(unclass(as.Date(x)), .995, na.rm=T), origin = "1970-01-01"),
p999= as.Date(quantile(unclass(as.Date(x)), .999, na.rm=T), origin = "1970-01-01"),
max= as.Date(max(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01")
  )
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

# Define the function adapted for Polars
sum_dates_polars <- function(df, date_col) {
  # Create the list of quantiles
  quantiles <- c(0.001, 0.005, 0.025, 0.25, 0.5, 0.75, 0.975, 0.995, 0.999)
  # Create expressions to calculate min and max
  expr_list <- list(
    pl$col(date_col)$min()$alias("min"),
    pl$col(date_col)$max()$alias("max")
  )
  # Add expressions for quantiles
  for (q in quantiles) {
    expr_list <- append(expr_list, pl$col(date_col)$quantile(q)$alias(paste0("p", sub("\\.", "", as.character(q)))))
  }
  # Apply the expressions and return a DataFrame with the results
  df$select(expr_list)
}

# Custom function for sampling with a seed
sample_n_with_seed <- function(data, size, seed) {
  set.seed(seed)
  dplyr::sample_n(data, size)
}

# Function to get the most frequent value 
most_frequent <- function(x) { 
  uniq_vals <- unique(x)
  freq_vals <- sapply(uniq_vals, function(val) sum(x == val))
  most_freq <- uniq_vals[which(freq_vals == max(freq_vals))]
  
  if (length(most_freq) == 1) {
    return(most_freq)
  } else {
    return(NA)
  }
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#CONFIG #######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

options(scipen=2) #display numbers rather scientific number

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

# Define the function first
#oins these values with semicolons and optionally truncates the result if it exceeds a specified width.
toString2 <- function(x, width = NULL, ...) {
    string <- paste(x, collapse = "; ")
    if (missing(width) || is.null(width) || width == 0) 
        return(string)
    if (width < 0) 
        stop("'width' must be positive")
    if (nchar(string, type = "w") > width) {
        width <- max(6, width)
        string <- paste0(substr(string, 1, width - 3), "...")
    }
    string
}




extract_fit <- function(modelo) {
  logLik_value <- modelo$loglik
  # n_params <- attr(logLik_value, "df")
  # n_obs <- length(modelo$y)
  n_params <- modelo$n.par
  n_obs <- modelo$n.obs #no sé si mejor n.obs.tot
  convergence_code <- modelo$code # Check convergence status (0, 1, or 2)
  
  aic <- 2 * modelo$n.par - 2 * modelo$loglik
  bic <- log(modelo$n.obs) * modelo$n.par - 2 * modelo$loglik
  
  data.frame(
    LogLikelihood = as.numeric(logLik_value),
    NumParameters = n_params,
    NumObservations = n_obs,
    AIC = aic,
    BIC = bic,
    convergence = convergence_code
  )
}
calculate_smr_orig = function(data) {
  data|>
    summarise(
      Observed = sum(observed),
      Expected = sum(expected)
    )|>
    rowwise()|>
    mutate(
      SMR = Observed / Expected,
      lo = biostat3::poisson.ci(Observed, Expected)[1],
      up = biostat3::poisson.ci(Observed, Expected)[2]
    )|>
    ungroup()
}
calculate_smr = function(data) {
  data|>
    summarise(
      Observed = sum(observed, na.rm=T),
      Expected = sum(expected, na.rm=T)
    )|>
    rowwise()|>
    mutate(
      SMR = Observed / Expected,
      lo = biostat3::poisson.ci(Observed, Expected)[1],
      up = biostat3::poisson.ci(Observed, Expected)[2]
    )|>
    ungroup()
}

calculate_smr_alt <- function(data) {
  data|>
    summarise(
      Observed = sum(observed),
      Expected = sum(expected)
    )|>
    rowwise()|>
    mutate(
      # Reemplazar Expected == 0 con un valor pequeño
      Expected = ifelse(Expected == 0, 1e-5, Expected),
      Observed = ifelse(Observed == 0, 1e-5, Observed),
      SMR = Observed / Expected,
      lo = biostat3::poisson.ci(Observed, Expected)[1],
      up = biostat3::poisson.ci(Observed, Expected)[2]
    )|>
    ungroup()
}


theme_sjPlot_manual <- function() {
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    panel.background = element_rect(fill = "white"),
    panel.grid.major = element_line(color = "gray80"),
    panel.grid.minor = element_line(color = "gray90")
  )
}
summarize_numerical_tt <- function(df, var) {
  
  df <- as_tidytable(df)[!is.na(status)]   # drop rows with missing status
  
  # ── 1. summaries by status ──────────────────────────────────────
  by_status <- df %>% 
    tidytable::summarise(
      Total  = .N,
      Mean   = sprintf("%.1f", mean(get(var), na.rm = TRUE)),
      SD     = sprintf("%.1f", sd(get(var),  na.rm = TRUE)),
      Median = sprintf("%.1f", median(get(var), na.rm = TRUE)),
      IQR    = sprintf("%.1f", IQR(get(var),   na.rm = TRUE)),
      Min    = sprintf("%.1f", min(get(var),   na.rm = TRUE)),
      Max    = sprintf("%.1f", max(get(var),   na.rm = TRUE)),
      pres   = sprintf("%.1f [%.1f-%.1f]",
                       median(get(var), na.rm = TRUE),
                       quantile(get(var), .25, na.rm = TRUE),
                       quantile(get(var), .75, na.rm = TRUE)),
      .by = status
    )|> tidytable::mutate(status= as.character(status))
  
  # ── 2. overall (Total) row ──────────────────────────────────────
  overall <- df %>% 
    tidytable::summarise(
      Total  = .N,
      Mean   = sprintf("%.1f", mean(get(var), na.rm = TRUE)),
      SD     = sprintf("%.1f", sd(get(var),  na.rm = TRUE)),
      Median = sprintf("%.1f", median(get(var), na.rm = TRUE)),
      IQR    = sprintf("%.1f", IQR(get(var),   na.rm = TRUE)),
      Min    = sprintf("%.1f", min(get(var),   na.rm = TRUE)),
      Max    = sprintf("%.1f", max(get(var),   na.rm = TRUE)),
      pres   = sprintf("%.1f [%.1f-%.1f]",
                       median(get(var), na.rm = TRUE),
                       quantile(get(var), .25, na.rm = TRUE),
                       quantile(get(var), .75, na.rm = TRUE))
    ) %>% 
    mutate(status = "Total")
  
  # ── 3. bind and return ──────────────────────────────────────────
  bind_rows(by_status, overall) |>
    dplyr::mutate(status = factor(status, levels = c("Total", "0", "1"))) |>
    arrange(status)
}
summarize_categorical_tt <- function(.data, var) {
  var  <- rlang::as_name(rlang::ensym(var))    # make sure it’s a plain string
  tbl  <- as_tidytable(.data)                  # guarantee tidytable class
  
  ## ── counts per status ──────────────────────────────────────────────
  tab <- tbl %>% 
    tidytable::summarise(
      n   = .N,
      .by = c("status", var)                  # fast group-by in tidytable
    ) %>% 
    pivot_wider(
      names_from  = status,
      values_from = n,
      values_fill = 0
    ) %>% 
    mutate(Total = rowSums(across(where(is.numeric))))
  
  ## ── build “count (pct)” strings ────────────────────────────────────
  status_cols <- setdiff(names(tab), c(var, "Total"))
  status_cols <- sort(status_cols, na.last = TRUE)  # ensure "0" then "1", etc.
  pct_cols    <- c("Total", status_cols)
  
  tab <- tab %>% 
    tidytable::mutate(
      across(
        all_of(pct_cols),
        \(x) paste0(x, " (", sprintf("%.1f", x / sum(x) * 100), ")"),
        .names = "{.col}_pct"
      )
    )
  
  ## ── keep only the pct columns in required order ────────────────────
  tab %>% 
    tidytable::select(all_of(c(var, "Total_pct", paste0(status_cols, "_pct"))))
}
#SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv
# treatment modality
# substance use at treatment entry
# initial treatment outcome
# days in treatment
# age at treatment entry
# year of treatment initiation


#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

year_fraction <- function(dates) {
  dates <- as.Date(dates)  # Ensure input is Date class
  
  years <- as.numeric(format(dates, "%Y"))
  day_of_year <- as.numeric(format(dates, "%j"))
  
  # Function to check leap year
  is_leap_year <- function(year) {
    (year %% 4 == 0 & year %% 100 != 0) | (year %% 400 == 0)
  }
  
  days_in_year <- ifelse(is_leap_year(years), 366, 365)
  
  years + (day_of_year - 1) / days_in_year
}
cmr_ci_phi <- function(observed, pyrs,
                   phi = 1,           # escala de sobre-dispersión
                   conf.level = 0.95,
                   factor = 1000) {

  # Validación básica
  if (any(is.na(observed) | is.na(pyrs)))
    stop("NAs no permitidos; limpia antes los datos.")
  if (any(observed < 0 | pyrs <= 0))
    stop("observed ≥ 0 y pyrs > 0 son obligatorios.")

  z <- qnorm(1 - (1 - conf.level) / 2)

  # CMR
  cmr <- observed / pyrs * factor

  # IC
  ci_low  <- ci_high <- numeric(length(observed))

  zeros <- observed == 0
  if (any(zeros)) {
    # Límite gamma exacto cuando hay cero muertes
    ci_low[zeros]  <- 0
    ci_high[zeros] <- qgamma(conf.level, shape = 1,
                             rate  = pyrs[zeros]) * factor
  }
  if (any(!zeros)) {
    # Log-normal con sobre-dispersión
    se_log <- sqrt(phi / observed[!zeros])
    ci_low[!zeros]  <- cmr[!zeros] * exp(-z * se_log)
    ci_high[!zeros] <- cmr[!zeros] * exp( z * se_log)
  }

  data.frame(CMR = cmr,
             CI_low = ci_low,
             CI_high = ci_high)
}

invisible("using sir output")
sir_ci_phi_improved <- function(sir_obj, phi, conf.level = 0.95) {
  #Método log-normal, the best, dont overestimate, or subestimate variance
  # extract totals
  total_obs <- sir_obj$observed
  total_exp <- sir_obj$expected
  
  # Calculate SEs
  theta <- total_obs / total_exp
    # Normal approximation, n>20  
  # Corrected SEs (McCullagh & Nelder, 1989)
  # “For ratios of Poisson means (such as SIR or CMR), the appropriate approach is to use multinomial or binomial models conditioned on the total observed.”
  # Breslow NE, Day NE. Statistical Methods in Cancer Research, Vol. II (IARC, 1987), §2.2. – Derives the same SE formula and recommends inflating by φ in the presence of heterogeneity.
  z <- qnorm(1 - (1 - conf.level)/2)

  se_log <- sqrt(phi / total_obs)  # Valid formula
  
  # ICs
  lci <- theta * exp(-z * se_log)
  uci <- theta * exp(z * se_log)
  
  data.frame(
    SIR = theta,
    CI_low = lci,
    CI_high = uci,
    phi_used = phi
  )
}
invisible("using glmdf")
sir_ci_phi_improved2 <- function(observed, expected, phi = 1, conf.level = 0.95) {
    z <- qnorm(1 - (1 - conf.level)/2)
    theta <- observed / expected
    
    # SE en escala logarítmica con ajuste φ
    se_log <- sqrt(phi / observed)
    
    # ICs multiplicativos (correctos para ratios)
    lci <- theta * exp(-z * se_log)
    uci <- theta * exp(z * se_log)
    
    list(
        SIR = theta,
        CI_low = lci,
        CI_high = uci
    )
}

#We estimated the dispersion parameter (φ) for each subgroup by fitting an intercept-only quasi-Poisson model. This method assumes no major unmeasured heterogeneity within subgroups. While this may slightly overestimate dispersion due to residual confounding, it yields conservative confidence intervals that maintain the nominal coverage (Breslow and Day, 1987)
sir_cmr_subgroup <- function(df, group_var, phi = NULL) {

    sir_ci_phi_row <- function(observed, expected, phi) {
      sir_ci_phi_improved(
        list(observed = observed, expected = expected),
        phi
      )
    }
    
    # 1. Compute summary statistics by group
    group_stats <- df %>%
        dplyr::group_by(dplyr::across(dplyr::all_of(group_var))) %>%
        dplyr::summarise(
            observed = sum(from0to1, na.rm = TRUE),
            pyrs = sum(pyrs, na.rm = TRUE),
            expected = sum(expected, na.rm = TRUE),
            .groups = "drop"
        ) %>%
        dplyr::mutate(
            sir = observed / expected,
            ear = (observed - expected) / pyrs * 1000
        )
    
    # 2. Compute dispersion parameter (phi) for each group if not provided
    if (is.null(phi)) {
        phi_vals <- df %>%
            dplyr::group_by(dplyr::across(dplyr::all_of(group_var))) %>%
            dplyr::group_modify(~ {
                # Skip groups with no events
                if (sum(.x$from0to1, na.rm = TRUE) == 0) {
                    return(data.frame(phi = NA_real_))
                }
                
                # Fit quasi-Poisson model
                mod <- tryCatch(
                    stats::glm(
                        from0to1 ~ 1,
                        family = quasipoisson,
                        offset = log(expected),
                        data = .x
                    ),
                    error = function(e) NULL
                )
                
                if (is.null(mod)) return(data.frame(phi = NA_real_))
                data.frame(phi = summary(mod)$dispersion)
            }) %>%
            dplyr::ungroup()
        
        group_stats <- dplyr::left_join(group_stats, phi_vals, by = group_var)
    } else {
        group_stats$phi <- phi
    }
    
    # 3. Compute confidence intervals
    result <- group_stats %>%
        dplyr::mutate(
            # Compute CMR with CI
            cmr_ci = purrr::pmap(
                list(observed, pyrs, phi),
                function(obs, py, p) {
                    cmr_ci_phi(obs, py, p)
                }
            ),
            # Compute SIR with CI
            sir_ci = purrr::pmap(
                list(observed, expected, phi),
                function(obs, exp, p) {
                    sir_ci_phi_improved2(obs, exp, p)
                }
            )
        ) %>%
        dplyr::mutate(
            # Format CMR values
            CMR_1000 = purrr::map_chr(cmr_ci, ~ sprintf(
                "%.1f (%.1f-%.1f)", .x$CMR, .x$CI_low, .x$CI_high
            )),
            # Format SIR values
            SMR = purrr::map_chr(sir_ci, ~ sprintf(
                "%.6f (%.6f-%.6f)", .x$SIR, .x$CI_low, .x$CI_high
            ))
        )
    
    # 4. Final formatting - AHORA INCLUYE PHI
    result %>%
        dplyr::mutate(
            expected = round(expected, 0),
            pyrs = round(pyrs, 0),
            EAR = sprintf("%.2f", ear),
            # Formatear phi con 3 decimales
            phi_fmt =  phi
        ) %>%
        dplyr::select(
            dplyr::all_of(group_var), 
            observed, 
            pyrs, 
            CMR_1000, 
            expected, 
            SMR, 
            EAR,
            phi = phi_fmt  # Nueva columna con phi formateado
        )
}

#2025-06-16
#For indirect SMRs (log(expected))
extract_phi <- function(df) {
  df_glm <- df %>%
    left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
    mutate(expected = pyrs * haz)
  
  # Modelo con variables de estratificación
  model_poisson <- glm(
    from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
    family = poisson,
    offset = log(expected),
    data = df_glm
  )
  
  # Cálculo robusto de φ
  pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
  df_residual <- df.residual(model_poisson)
  dispersion_index <- pearson_chisq / df_residual
  
  return(dispersion_index)
}

#etract phi for a directly standardized mortality rates 
extract_phi_dsr <- function(df) {
  df_glm <- df %>%
    left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
    mutate(expected = pyrs * haz)
  
  # model with stratification variables
  model_poisson <- glm(
    from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
    family = poisson,
    offset = log(pyrs),
    data = df_glm
  )
  
  # Cálculo robusto de φ
  pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
  df_residual <- df.residual(model_poisson)
  dispersion_index <- pearson_chisq / df_residual
  
  return(dispersion_index)
}

#Fay & Feuer (1997). Confidence intervals for directly standardized rates: a 
#method based on the gamma distribution. Stat Med 16:791-801.
dsr_format <- function(rate, se, phi = 1, factor = 1e3, digits = 2, conf = 0.95) {
  z <- qnorm(1 - (1 - conf)/2)
  sprintf(paste0("%.", digits, "f (%.", digits, "f–%.", digits, "f)"),
          rate*factor,
          pmax(0, (rate - z*se*sqrt(phi))*factor),
          (rate + z*se*sqrt(phi))*factor)
}
dsr_format_corr <- function(rate, se, phi = 1,
                       factor = 1e4,       # multiplica la tasa (p. ej. ×100 000)
                       digits = 2,         # decimales a mostrar
                       conf   = 0.95) {    # nivel de confianza
  z  <- qnorm(1 - (1 - conf) / 2)
  se <- se * sqrt(phi)                     # sobredispersión
  se_log <- ifelse(rate > 0, se / rate, NA)
  
  L <- rate * exp(-z * se_log)
  U <- rate * exp( z * se_log)
  
  sprintf(paste0("%.", digits, "f (%.", digits, "f–%.", digits, "f)"),
          rate * factor, L * factor, U * factor)
}

extract_spline_data <- function(x) {
  if (is.null(x$spline.seq.A)) stop('No splines found.')
  
  plotdim <- as.numeric(c(!is.null(x$spline.seq.A),
                          !is.null(x$spline.seq.B),
                          !is.null(x$spline.seq.C)))
  splines <- c('spline.seq.A', 'spline.seq.B', 'spline.seq.C')[1:sum(plotdim)]
  ests <- gsub("seq", "est", splines)
  
  library(tidyr)
  library(dplyr)
  
  all_data <- lapply(seq_along(splines), function(i) {
    spline_name <- splines[i]
    est_name <- ests[i]
    
    # Extract spline sequence and estimates
    spline_seq <- x[[spline_name]]
    est_df <- x[[est_name]]
    
    # Check if est_df has a factor/level column (usually first column)
    if (ncol(est_df) >= 4) {
      # Assuming first column is level, next columns are estimate, lower CI, upper CI
      df <- data.frame(
        spline = x$spline[i],
        spline_value = spline_seq,
        level = est_df[,1],
        estimate = est_df[,2],
        lower_ci = est_df[,3],
        upper_ci = est_df[,4]
      )
    } else {
      # If no levels, just estimates and CIs
      df <- data.frame(
        spline = x$spline[i],
        spline_value = spline_seq,
        estimate = est_df[,1],
        lower_ci = est_df[,2],
        upper_ci = est_df[,3]
      )
    }
    return(df)
  })
  
  # Combine all spline data into one data frame
  combined_df <- bind_rows(all_data)
  
  return(combined_df)
}

hetero <- function(rrr1, lcl1, ucl1, rrr2, lcl2, ucl2) {
    # Calcular varianzas en escala logarítmica
    z <- qnorm(0.975)
    var1 <- ((log(ucl1) - log(rrr1)) / z)^2
    var2 <- ((log(ucl2) - log(rrr2)) / z)^2
    
    # Estimación agrupada
    log_rrr1 <- log(rrr1)
    log_rrr2 <- log(rrr2)
    pooled_log <- (log_rrr1/var1 + log_rrr2/var2) / (1/var1 + 1/var2)
    pooled <- exp(pooled_log)
    
    # Cochran's Q
    q_val <- ((log_rrr1 - pooled_log)^2)/var1 + ((log_rrr2 - pooled_log)^2)/var2
    p_val <- pchisq(q_val, df = 1, lower.tail = FALSE)
    
    # Test de interacción de Altman
    d_log <- log_rrr1 - log_rrr2
    se1 <- (log(ucl1) - log(lcl1)) / (2*z)
    se2 <- (log(ucl2) - log(lcl2)) / (2*z)
    se_d <- sqrt(se1^2 + se2^2)
    z_test <- d_log / se_d
    p_inter <- 2 * pnorm(-abs(z_test))
    
    # Intervalos de confianza
    ci_diff <- c(d_log - z*se_d, d_log + z*se_d)
    ci_ratio <- exp(ci_diff)
    
    # Salida con formato idéntico al de Stata
    cat("First RRR (95% CI): ", sprintf("%4.2f (%4.2f, %4.2f)\n", rrr1, lcl1, ucl1))
    cat("Second RRR (95% CI): ", sprintf("%4.2f (%4.2f, %4.2f)\n\n", rrr2, lcl2, ucl2))
    cat("Pooled RRR =", sprintf("%4.2f\n", pooled))
    cat("Cochran's Q =", sprintf("%4.2f", q_val), " p-value =", sprintf("%5.4f\n\n", p_val))
    
    cat("Altman test for interaction\n\n")
    cat("Diff in log RRRs:", sprintf("%5.3f\n", d_log))
    cat("95% CI for diff: (", sprintf("%4.3f, %4.3f)\n", ci_diff[1], ci_diff[2]))
    cat("Test of interaction: z =", sprintf("%5.3f", z_test), " p =", sprintf("%5.4f\n", p_inter))
    cat("Ratio of estimates, 95% CI:", sprintf("%4.2f (%4.2f, %4.2f)\n", 
                                               exp(d_log), ci_ratio[1], ci_ratio[2]))
}
pairwise_smr_test <- function(smrs, lowers, uppers, alpha = 0.05) {
  # Validate inputs
  stopifnot(
    length(smrs) == length(lowers),
    length(smrs) == length(uppers),
    alpha > 0, alpha < 1
  )
  
  # Calculate SEs from 95% CIs (FIXED confidence level)
  # Original data uses 95% CIs -> use z = 1.96 regardless of 'alpha'
  z_ci <- qnorm(0.975)  # 1.96 for 95% CI
  ses <- (uppers - lowers) / (2 * z_ci)
  
  # Generate pairwise comparisons
  n <- length(smrs)
  group_names <- paste("Group", 1:n)
  comparisons <- combn(n, 2, simplify = FALSE)
  
  # Calculate differences, SEs, z-scores, p-values
  results <- lapply(comparisons, function(pair) {
    i <- pair[1]
    j <- pair[2]
    
    data.frame(
      group1 = group_names[i],
      group2 = group_names[j],
      smr1 = smrs[i],
      smr2 = smrs[j],
      difference = smrs[i] - smrs[j],
      se_diff = sqrt(ses[i]^2 + ses[j]^2)
    )
  }) |> do.call(what = rbind)
  
  # Add z-scores and p-values
  results$z <- abs(results$difference) / results$se_diff
  results$p_unadj <- 2 * pnorm(-results$z)
  
  # Apply Holm-Bonferroni correction
  results$p_holm <- p.adjust(results$p_unadj, method = "holm")
  
  # Significance labels
  results$significance <- cut(
    results$p_holm,
    breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
    labels = c("***", "**", "*", "NS")
  )
  
  return(results)
}
Error in contrib.url(repos, "source") : 
  trying to use CRAN without setting a mirror
* pak version:
- 0.8.0.1
* Version information:
- pak platform: x86_64-w64-mingw32 (current: x86_64-w64-mingw32, compatible)
- pak repository: - (local install?)
* Optional packages installed:
- pillar
* Library path:
- G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32
- C:/Program Files/R/R-4.4.1/library
* pak is installed at G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32/pak.
* Dependency versions:
- callr      3.7.6      
- cli        3.6.2      
- curl       5.2.1      
- desc       1.4.3      
- filelock   1.0.3      
- jsonlite   1.8.8      
- lpSolve    5.6.23.9000
- pkgbuild   1.4.4      
- pkgcache   2.2.2.9000 
- pkgdepends 0.7.2.9000 
- pkgsearch  3.1.3.9000 
- processx   3.8.4      
- ps         1.7.6      
- R6         2.5.1      
- zip        2.3.1      
* Dependencies can be loaded
> memory.size()
[1] Inf
> memory.size(TRUE)
[1] Inf
> memory.limit()
[1] Inf
[1] Inf


Note

Loads and cleans databases of substance use disorder treatments and mortality in Chile, generating adjusted mortality rates and standardized mortality ratios. Additionally, calculates net survival using survival analysis techniques and compares observed mortality with that expected according to age, year, and sex.

Import & format database

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel<-
SISTRAT23_c1_2010_2022_df_prev1q[, c("rn",
        "hash_key",
        "adm_age_rec2",#"adm_yr_rec",
        "birth_date_rec",#"birth_date_rec_imp",
        "adm_date_rec2",#"adm_date_rec",
        "adm_date_num_rec2",#"adm_date_rec_num",
        "TABLE",#"TABLE_rec",
        "dit_rec6",#"dit_rec",
        "disch_date_num_rec6",#"disch_date_num",
        "disch_date_rec6",
        "tr_compliance_rec3",#motivo_de_egreso",
        "primary_sub",#"sustancia_principal",
        "second_sub1",#"sustancia_principal",
        "second_sub2",#"sustancia_principal",
        "second_sub3",#"sustancia_principal",
        "sub_dep_icd10_status",#"diagnostico_trs_consumo_sustancia",
        "sex_rec", #"sexo",
        "municipallity_res_cutpre18",#"comuna_residencia",
        "region_del_centro",#
        "evaluacion_del_proceso_terapeutico",
        "edad_inicio_consumo",
        "ed_attainment",
        "plan_type"# treatment modality
)]

We counted the amount of duplicate records in terms of HASH and admission age, and then we discard.

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel|>
  janitor::get_dupes(hash_key, adm_age_rec2)|>  nrow()
rows_with_dupes<-SISTRAT23_c1_2010_2022_df_prev1q_sel|>
  janitor::get_dupes(hash_key, adm_age_rec2)|> 
  pull(rn)

SISTRAT23_c1_2010_2022_df_prev1q_sel2<-SISTRAT23_c1_2010_2022_df_prev1q_sel|>
  (\(df) {
    nrow(df)->>before_disc_dup_hash_age_adm_nrow
    cat(paste0("1.Number of cases before discarding duplicates in admission age and hash key: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("1.Number of patients before discarding duplicates in admission age and hash key: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
  cat(paste0("Records with unavailable missing days in treatment (eg., currently in treatment): ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |> 
      filter(is.na(years_in_tr)) |> nrow(), "\n"));
    cat(paste0("Records with negative days in treatment: ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |> 
      filter(years_in_tr<0)|>  nrow(), "\n")) ;
    cat(paste0("Records with more than 3 years in treatment: ",df |> mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year")) |> 
      filter(years_in_tr>3)|> nrow(), "\n")) 
  df
  })()|>
  mutate(years_in_tr= time_length(interval(adm_date_rec2, disch_date_rec6), unit="year"))|>
  filter(!is.na(years_in_tr)|years_in_tr>=0|years_in_tr<=3)|> 
  group_by(hash_key, adm_age_rec2)|> 
  slice_max(dit_rec6)|> 
  ungroup()|> 
  (\(df) {
    nrow(df)->>after_disc_dup_hash_age_adm_nrow
    cat(paste0("1.Number of cases after discarding duplicates in admission age and hash key and validating days in treatment: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("1.Number of patients after discarding duplicates in admission age and hash key and validating days in treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    df
  })()

cat("Discarded rows\n")
before_disc_dup_hash_age_adm_nrow-after_disc_dup_hash_age_adm_nrow #before 18-06 there was  150,019 , p= 106,283 as a result in this step
[1] 54
1.Number of cases before discarding duplicates in admission age and hash key: 150,046 
1.Number of patients before discarding duplicates in admission age and hash key: 106,283 
Records with unavailable missing days in treatment (eg., currently in treatment): 4007
Records with negative days in treatment: 8
Records with more than 3 years in treatment: 1039
1.Number of cases after discarding duplicates in admission age and hash key and validating days in treatment: 146,012 
1.Number of patients after discarding duplicates in admission age and hash key and validating days in treatment: 103,612 
Discarded rows
[1] 4034

At this date, i only have data until 2020. I also joined mortlaity database with SENDA treatments. We selected records of treatments between 2010 and 2020, of patients admitted between 18 and 64 years old.

Code
cat("Make the death date\n")
mortality$death_date <- 
  as.Date(paste0(mortality$ano_def, "-", 
                 sprintf("%02.0f",mortality$mes_def), "-", 
                 mortality$dia_def))

cat("Maximum death date available:\n")
max(mortality$death_date, na.rm=T)
#[1] "2020-12-31"

SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel2 |>
  tidytable::left_join(mortality[,c("hashkey", "death_date")], by=c("hash_key"="hashkey"), multiple="first") |>
  tidytable::mutate(status=ifelse(is.na(death_date), 0, 1))

SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv <- as_tidytable(SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv)


SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv|>
  (\(df) {
    cat(paste0("3a. Before discarding cases, cases, 2010-2019, first treatment: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("3a. Before discarding cases, patients, 2010-2019, first treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df)->> before_df3a_nrow
    df
  })() |> 
  tidytable::arrange(hash_key, adm_age_rec2)|>
  #treatments between 2010 and 2020
  tidytable::filter(adm_date_rec2>="2010-01-01", adm_date_rec2<"2020-12-31")|> 
  tidytable::group_by(hash_key) |>
  tidytable::mutate(tto= tidytable::row_number())|>
  tidytable::slice_min(tto)|>
  tidytable::ungroup()|>
  #admission ages between 18-65
  tidytable::filter(adm_age_rec2>=18, adm_age_rec2<65)|> 
  tidytable::mutate(post_ttos=ifelse(tto>1, 1, 0))|> 
  (\(df) {
    cat(paste0("3a.Number of cases, 2010-2019, first treatment: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("3a.Number of patients, 2010-2019, first treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df)->> after_df3a_nrow
    df
  })()

cat("Discarded records:\n")
before_df3a_nrow-after_df3a_nrow
Make the death date
Maximum death date available:
[1] "2020-12-31"
3a. Before discarding cases, cases, 2010-2019, first treatment: 146,014 
3a. Before discarding cases, patients, 2010-2019, first treatment: 103,612 
3a.Number of cases, 2010-2019, first treatment: 88,774 
3a.Number of patients, 2010-2019, first treatment: 88,774 
Discarded records:
[1] 57240

We construct the database of sensitivity analyses, count previous treatments, but i get the last treatment(3b)

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel2_surv|>
  (\(df) {
    cat(paste0("3b. Before discarding cases, cases, 2010-2019, last treatment: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("3b. Before discarding cases, patients, 2010-2019, last treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df)->> before_df3b_nrow
    df
  })() |>   
  #tidytable::filter(TABLE<2021)|>
  tidytable::arrange(hash_key, adm_age_rec2)|>
  #treatments between 2010 and 2020
  tidytable::filter(adm_date_rec2>="2010-01-01", adm_date_rec2<"2020-12-31")|> 
  tidytable::group_by(hash_key) |>
  tidytable::mutate(tto= tidytable::row_number())|>
  tidytable::slice_max(tto)|>
  tidytable::ungroup()|>
  #admission ages between 18-65 (85,763 - 84,502)= 84.897
  tidytable::filter(adm_age_rec2>=18, adm_age_rec2<65)|> 
  tidytable::mutate(prev_ttos=ifelse(tto>1, 1, 0))|> 
  (\(df) {
    cat(paste0("3b.Number of cases, 2010-2019, last (LVCF) treatment: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("3b.Number of patients, 2010-2019, last (LVCF) treatment: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df)->> after_df3b_nrow
    df
  })()


cat("Discarded records:\n")
before_df3b_nrow-after_df3b_nrow
3b. Before discarding cases, cases, 2010-2019, last treatment: 146,014 
3b. Before discarding cases, patients, 2010-2019, last treatment: 103,612 
3b.Number of cases, 2010-2019, last (LVCF) treatment: 88,725 
3b.Number of patients, 2010-2019, last (LVCF) treatment: 88,725 
Discarded records:
[1] 57289

Discarded ongoing treatments (truncated, death or currently in treatment, o referrals to teratments outside SENDA network).

Code
days_years<- 365.2425

SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv|>
  (\(df) {
    nrow(df)->>df4a_nrow_pre
    cat(paste0("Discarded (death, no tr. compliance), cases(4a): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
    cat(paste0("Discarded (death, no tr. compliance), patients(4a): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
  df
    })()|>
  tidytable::filter(!is.na(tr_compliance_rec3) & tr_compliance_rec3!="death" & !grepl("truncated|currently|referral", tr_compliance_rec3))|>
  # calculamos la edad al egreso
  tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
  tidytable::mutate(timesurv= tidytable::case_when(
    status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
    status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
  tidytable::mutate(death_date_rec= tidytable::case_when(
    status==1~ death_date,
    status==0~ as.Date("2020-12-31")))|>
  tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|> 
  (\(df) {
    cat(paste0("4a.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("4a.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df) ->>df4a_nrow_post
    df
  })()
cat("Discarded records:\n")
df4a_nrow_pre-df4a_nrow_post

cat("Records were excluded for patients who had not yet been discharged")
 paste0(round(((df4a_nrow_pre-df4a_nrow_post)/222945)*100,1),"%")
Discarded (death, no tr. compliance), cases(4a): 14,304 
Discarded (death, no tr. compliance), patients(4a): 14,304 
4a.Number of cases: 74,470 
4a.Number of patients: 74,470 
Discarded records:
[1] 14304
Records were excluded for patients who had not yet been discharged[1] "6.4%"

We replicated for thae database of the last recorded treatments.

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel3b_surv|>
  (\(df) {
    cat(paste0("Discarded (death, no tr. compliance), cases(4b): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
    cat(paste0("Discarded (death, no tr. compliance), patients(4b): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
    nrow(df) ->>df4b_nrow_pre
    df
  })()|>
  tidytable::filter(!is.na(tr_compliance_rec3) & tr_compliance_rec3!="death" & !grepl("truncated|currently|referral", tr_compliance_rec3))|>  # calculamos la edad al egreso
  tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
  tidytable::mutate(death_date_rec= tidytable::case_when(
    status==1~ death_date,
    status==0~ as.Date("2020-12-31")))|>
   tidytable::mutate(timesurv= tidytable::case_when(
    status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
    status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
  tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|> 
  (\(df) {
    cat(paste0("4b.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("4b.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df) ->>df4b_nrow_post
    df
  })()

cat("Discarded records:\n")
df4b_nrow_pre-df4b_nrow_post

cat("Records were excluded for patients who had not yet been discharged")
 paste0(round(((df4b_nrow_pre-df4b_nrow_post)/222945)*100,1),"%")
Discarded (death, no tr. compliance), cases(4b): 11,340 
Discarded (death, no tr. compliance), patients(4b): 11,340 
4b.Number of cases: 77,385 
4b.Number of patients: 77,385 
Discarded records:
[1] 11340
Records were excluded for patients who had not yet been discharged[1] "5.1%"
Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv<-
  SISTRAT23_c1_2010_2022_df_prev1q_sel3a_surv|>
  (\(df) {
    nrow(df)->>df4c_nrow_pre
    cat(paste0("Discarded (death, no tr. compliance), cases(4c): ", formatC(nrow(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3))), big.mark=",")),"\n")
    cat(paste0("Discarded (death, no tr. compliance), patients(4c): ", formatC(nrow(distinct(tidytable::filter(df, is.na(tr_compliance_rec3) | tr_compliance_rec3=="death"|grepl("truncated|currently|referral", tr_compliance_rec3)), hash_key)), big.mark=",")),"\n")
  df
    })()|>
  # calculamos la edad al egreso
  tidytable::mutate(disch_age_rec= (dit_rec6/365.241)+adm_age_rec2)|>
  tidytable::mutate(timesurv= tidytable::case_when(
    status==1~ time_length(interval(adm_date_rec2, death_date), unit="year"),
    status==0~ time_length(interval(adm_date_rec2, as.Date("2020-12-31")), unit="year")))|>
  tidytable::mutate(death_date_rec= tidytable::case_when(
    status==1~ death_date,
    status==0~ as.Date("2020-12-31")))|>
  tidytable::mutate(death_age_rec= as.integer(timesurv+adm_age_rec2))|> 
  (\(df) {
    cat(paste0("4c.Number of cases: ", formatC(nrow(df), big.mark=",")),"\n")
    cat(paste0("4c.Number of patients: ", formatC(nrow(distinct(df, hash_key)), big.mark=",")),"\n")
    nrow(df) ->>df4c_nrow_post
    df
  })()
cat("Discarded records:\n")
df4c_nrow_pre-df4c_nrow_post

cat("Records were excluded for patients who had not yet been discharged")
 paste0(round(((df4c_nrow_pre-df4c_nrow_post)/222945)*100,1),"%")
Discarded (death, no tr. compliance), cases(4c): 14,304 
Discarded (death, no tr. compliance), patients(4c): 14,304 
4c.Number of cases: 88,774 
4c.Number of patients: 88,774 
Discarded records:
[1] 0
Records were excluded for patients who had not yet been discharged[1] "0%"

Added admission age and year

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2)
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_age_rec2)
#add admission year
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_date_rec2))
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$adm_date_rec2))


SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_age_rec2_int <- as.integer(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_age_rec2)
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$yr_adm <- floor(year(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2))


Bring & format mortality tables

Import and format tables for compatibility

Code
# Importar los archivos desde la ruta base
base_path <- paste0(getwd(),"/_input/")

mltper_1x1 <- try(rio::import(paste0(base_path, "mltper_1x1.txt")))
fltper_1x1 <- try(rio::import(paste0(base_path, "fltper_1x1.txt")))
mltper_5x1 <- try(rio::import(paste0(base_path, "mltper_5x1.txt")))
fltper_5x1 <- try(rio::import(paste0(base_path, "fltper_5x1.txt")))

mltper_1x10 <- try(rio::import(paste0(base_path, "mltper_1x10.txt")))
fltper_1x10 <- try(rio::import(paste0(base_path, "fltper_1x10.txt")))
mltper_5x10 <- try(rio::import(paste0(base_path, "mltper_5x10.txt")))
fltper_5x10 <- try(rio::import(paste0(base_path, "fltper_5x10.txt")))

#filtro para obtener las tasas de los últimos 10 años
mltper_1x10_filt<-mltper_1x10[mltper_1x10$Year=="2010-2019",]
fltper_1x10_filt<-fltper_1x10[fltper_1x10$Year=="2010-2019",]

mltper_5x10_filt<-mltper_5x10[mltper_5x10$Year=="2010-2019",]
fltper_5x10_filt<-fltper_5x10[fltper_5x10$Year=="2010-2019",]


mltper_1x1_filt<-mltper_1x1[between(mltper_1x1$Year,2010,2020),]
fltper_1x1_filt<-fltper_1x1[between(fltper_1x1$Year,2010,2020),]

mltper_5x1_filt<-mltper_5x1[between(mltper_5x1$Year,2010,2020),]
fltper_5x1_filt<-fltper_5x1[between(fltper_5x1$Year,2010,2020),]

#cambiar la edad a entero
mltper_1x10_filt$age_rec <- as.numeric(mltper_1x10_filt$Age)

Warning: NAs introducidos por coerción

Code
fltper_1x10_filt$age_rec <- as.numeric(fltper_1x10_filt$Age)

Warning: NAs introducidos por coerción

Code
mltper_5x1_filt$age_rec <- as.numeric(mltper_5x1_filt$Age)

Warning: NAs introducidos por coerción

Code
fltper_5x1_filt$age_rec <- as.numeric(fltper_5x1_filt$Age)

Warning: NAs introducidos por coerción

Code
mltper_1x10_filt$sex <- "male"
fltper_1x10_filt$sex <- "female"
mltper_5x10_filt$sex <- "male"
fltper_5x10_filt$sex <- "female"
mltper_1x1_filt$sex <- "male"
fltper_1x1_filt$sex <- "female"
mltper_5x1_filt$sex <- "male"
fltper_5x1_filt$sex <- "female"

cons_rate_sex_1x10<-
  rbind.data.frame(mltper_1x10_filt, fltper_1x10_filt)[,c("age_rec","sex", "qx","mx")]
cons_rate_sex_1x10<-cons_rate_sex_1x10[which(!is.na(cons_rate_sex_1x10$age_rec)),]
cons_rate_sex_1x10$lambda_p <- -log( 1 - cons_rate_sex_1x10$qx ) / 365.241

cons_rate_sex_5x10<-
  rbind.data.frame(mltper_5x10_filt, fltper_5x10_filt)[,c("Age","sex", "lx", "qx","mx")]
cons_rate_sex_5x10$lambda_p <- -log( 1 - cons_rate_sex_5x10$qx ) / 365.241
invisible("Capping valores cercanos al 100% muertes")
invisible("No esn ecesario porque not engo esos casos")
cons_rate_sex_5x10<-cons_rate_sex_5x10[which(cons_rate_sex_5x10$Age!="110+"),]

cons_rate_sex_5x1<-
  rbind.data.frame(mltper_5x1_filt, fltper_5x1_filt)[,c("Year","Age","sex", "lx","qx","mx")]
cons_rate_sex_5x1$lambda_p <- -log( 1 - cons_rate_sex_5x1$qx ) / 365.241
invisible("Capping valores cercanos al 100% muertes")
invisible("No esn ecesario porque not engo esos casos")
cons_rate_sex_5x1<-cons_rate_sex_5x1[which(cons_rate_sex_5x1$Age!="110+"),]

Import mortality tables from National Statistics Institute, along with projections of the population.

Code
tablas_de_mortalidad_de_chile_1992_2050_agrupada <- 
  readxl::read_excel(paste0(base_path, "tablas-de-mortalidad-de-chile-1992-2050.xlsx"), 
                     sheet = "BD Tablas de Mortalidad", skip = 1)|> 
  janitor::clean_names()|> 
  dplyr::filter(ano>=2010, ano<=2020)|> 
  dplyr::mutate(edad= readr::parse_number(edad))|> 
  dplyr::filter(edad>=18, edad<65, region=="País")|>  
  dplyr::mutate(edad_anos_rec= dplyr::case_when(edad>=18 & edad<30~1,
                                                edad>=30 & edad<45~2,
                                                edad>=45 & edad<60~3,
                                                edad>=60 & edad<65~4,T~NA_real_))|>  
  dplyr::mutate(edad_anos_rec= factor(edad_anos_rec, levels=1:4, labels= c("18-29","30-44","45-59","60-64")))|>      
  dplyr::group_by(ano, sexo, edad_anos_rec)|> 
  dplyr::summarise(
    total_d_x = sum(d_x, na.rm = TRUE),      # Suma de muertes en el grupo
    total_l_x = sum(l_x, na.rm = TRUE),      # Suma de la población al inicio del grupo (debiese ser con años-persona)
    mean_m_x = mean(m_x),
    mortality_rate_grouped = total_d_x / total_l_x
  )

summarise() has grouped output by ‘ano’, ‘sexo’. You can override using the .groups argument.

Code
proy_ine_com<-
  rio::import("https://www.ine.gob.cl/docs/default-source/proyecciones-de-poblacion/cuadros-estadisticos/base-2017/ine_estimaciones-y-proyecciones-2002-2035_base-2017_comunas0381d25bc2224f51b9770a705a434b74.csv?sfvrsn=b6e930a7_3&download=true")|> 
  tidyr::pivot_longer(cols = dplyr::starts_with("Poblacion"), 
                      names_to = "anio", 
                      values_to = "poblacion")|> 
  dplyr::mutate(anio= gsub("Poblacion ","",anio), anio=as.numeric(anio))|> 
  dplyr::filter(anio>=2010 & anio<=2020)|> 
  dplyr::mutate(edad_anos_rec= dplyr::case_when(Edad>=15 & Edad<30~1,
                                                Edad>=30 & Edad<45~2,
                                                Edad>=45 & Edad<60~3,
                                                Edad>=60 & Edad<65~4,T~NA_real_))|> 
  dplyr:: mutate(edad_cat = dplyr::case_when(
    Edad >= 15 & Edad < 20 ~ "15-19",
    Edad >= 20 & Edad < 25 ~ "20-24",
    Edad >= 25 & Edad < 30 ~ "25-29",
    Edad >= 30 & Edad < 35 ~ "30-34",
    Edad >= 35 & Edad < 40 ~ "35-39",
    Edad >= 40 & Edad < 45 ~ "40-44",
    Edad >= 45 & Edad < 50 ~ "45-49",
    Edad >= 50 & Edad < 55 ~ "50-54",  
    Edad >= 55 & Edad < 60 ~ "55-59",
    Edad >= 60 & Edad < 65 ~ "60-64",
    Edad >= 65 & Edad < 70 ~ "65-69",    
    Edad >= 70 & Edad < 75 ~ "70-74",
    Edad >= 75 & Edad < 80 ~ "75-79",    
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  )) 

proy_ine_reg_group<-
  proy_ine_com|> 
  #2025 le agrego eso para que sepamos que es igual que antes ser2024
  dplyr::filter(Edad>=15, Edad<65)|> 
  #fomateamos para calzarla con la anterior
  dplyr::mutate(reg_res= sprintf("%02d", Region))|> 
  dplyr::group_by(reg_res, `Sexo (1=Hombre 2=Mujer)`,edad_anos_rec, anio)|> 
  dplyr::mutate(edad_anos_rec= factor(edad_anos_rec, levels=1:4, labels= c("15-29","30-44","45-59","60-65")))|>     
  dplyr::summarise(poblacion= sum(poblacion, na.rm=T))

summarise() has grouped output by ‘reg_res’, ‘Sexo (1=Hombre 2=Mujer)’, ‘edad_anos_rec’. You can override using the .groups argument.

Using relsurv package to combine UN mortality databases of females and males.

Code
rt<-
  relsurv::transrate.hmd(paste0(base_path, "mltper_1x1.txt"), 
                         paste0(base_path, "fltper_1x1.txt"))

# Supongamos que `rt` es tu objeto ratetable
# Convertir las dimensiones en vectores
ages <- attr(rt, "dimnames")$age  # Edad
years <- attr(rt, "dimnames")$year  # Año
sexes <- attr(rt, "dimnames")$sex  # Sexo

# Extraer los valores del ratetable
values <- as.vector(rt)  # Convierte el array en un vector plano

# Crear un dataframe con todas las combinaciones de las dimensiones
popmort1x1 <- expand.grid(age = ages,year = years,sex = sexes)

# Añadir los valores al dataframe
popmort1x1 <- popmort1x1 %>%
  mutate(rate = values*365.241) #introduje esto el 10-01-2025
Code
mx_1x1<-
  rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1), 
                   cbind.data.frame(sex="female", fltper_1x1))[,c("Year", "sex", "Age", "mx", "qx")]
mx_1x1$Age<- as.numeric(mx_1x1$Age)

Warning: NAs introducidos por coerción

Code
years_followup<-
  range(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm)[1]:range(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$yr_adm)[2]

mx_1x1_filt<-mx_1x1[as.numeric(as.character(mx_1x1$Year)) %in% years_followup,]
mx_1x1_filt2<-mx_1x1_filt[as.numeric(as.character(mx_1x1_filt$Age)) %in% min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]

mx_1x1_filt2$lambda_p_days <- -log( 1 - mx_1x1_filt2$qx ) / 365.241
warning(paste0("El mx es rate de HMS pero multiplicado por 365.41"))

Warning: El mx es rate de HMS pero multiplicado por 365.41

Code
mx_1x1_filt2$lambda_p_yrs <- -log( 1 - mx_1x1_filt2$qx )


mx_1x1_comp<-
  rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1), 
                   cbind.data.frame(sex="female", fltper_1x1))
mx_1x1_comp$Age<- as.numeric(mx_1x1_comp$Age)

Warning: NAs introducidos por coerción

Code
mx_1x1_comp_filt<-mx_1x1_comp[as.numeric(as.character(mx_1x1_comp$Year)) %in% years_followup,]
mx_1x1_comp_filt2<-mx_1x1_comp_filt[as.numeric(as.character(mx_1x1_comp_filt$Age)) %in% min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]

Population pyramid

Code
#18-29, 30-44, 45-59, 60-64
senda_2010_2020_2015<- 
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |> 
mutate(disch_age_rec_cat = dplyr::case_when(
  disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
  disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
  disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
  disch_age_rec >= 60 & disch_age_rec < 76 ~ "60-75",  #2025-06-13: MODIFIED TO AMPLIFY AGES,AND DID IT WITH AGE AT DISCAHRGE
  TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
))|>
  group_by(sex_rec, disch_age_rec_cat)|> 
  summarise(n=n())|> 
  ungroup() |> 
  filter(!is.na(disch_age_rec_cat))

summarise() has grouped output by ‘sex_rec’. You can override using the .groups argument.

Code
mort_2015<-
  mortality|> 
  filter(ano_def==2015)|> 
  mutate(adm_age_cat = dplyr::case_when(
    edad_cant >= 18 & edad_cant < 30 ~ "18-29",
    edad_cant >= 30 & edad_cant < 45 ~ "30-44",
    edad_cant >= 45 & edad_cant < 60 ~ "45-59",
    edad_cant >= 60 & edad_cant < 76 ~ "60-75",     #2025-06-13: MODIFIED TO AMPLIFY AGES
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))|>
  group_by(sexo, adm_age_cat)|> 
  summarise(n=n())|> 
  ungroup()|> 
  mutate(sex_rec=if_else(sexo==1,"male","female"))

summarise() has grouped output by ‘sexo’. You can override using the .groups argument.

Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Create a basic bar chart for one gender
basic_plot <-  ggplot(
  senda_2010_2020_2015, 
  aes(
    x = disch_age_rec_cat, 
    fill = sex_rec, 
    y = ifelse(
      test = sex_rec == "male", 
      yes = -n, 
      no = n
    )
  )
) + 
  geom_bar(stat = "identity") 

population_pyramid <- basic_plot +
  scale_y_continuous(
    labels = abs,
    limits = max(subset(senda_2010_2020_2015, !is.na(sex_rec)& !is.na(disch_age_rec_cat))$n) * c(-1,1)
  ) +
  coord_flip() +
  theme_minimal() +
  # Change scale_color_manual to scale_fill_manual
  scale_fill_manual(
    name = "Sex", # Change "Group" to "Sex" to match your labs()
    values = c(
      "female" = "#A6CEE3",
      "male" = "#1F78B4"
    ),
    na.translate = FALSE # Prevents NA from showing in legend if applicable
  ) +
  labs(
    x = "Age",
    y = "Population",
    fill = "Sex", # This matches the aesthetic and your new scale_fill_manual
    title = "SENDA population"
  )

tablas_de_mortalidad_de_chile_1992_2050_agrupada <- 
  readxl::read_excel(paste0(base_path, "tablas-de-mortalidad-de-chile-1992-2050.xlsx"), 
                     sheet = "BD Tablas de Mortalidad", skip = 1)|> 
  janitor::clean_names()|> 
  dplyr::filter(ano>=2010, ano<=2020)|> 
  dplyr::mutate(edad= readr::parse_number(edad))|> 
  dplyr::filter(edad>=18, edad<65, region=="País")|>  
  dplyr::mutate(edad_anos_rec= dplyr::case_when(edad>=18 & edad<30~1,
                                                edad>=30 & edad<45~2,
                                                edad>=45 & edad<60~3,
                                                edad>=60 & edad<76~4,T~NA_real_))|>  
  dplyr::mutate(adm_age_cat= factor(edad_anos_rec, levels=1:4, labels= c("18-29","30-44","45-59","60-75")))|> 
  mutate(sex_rec=if_else(sexo=="Hombre","male","female"))|> 
  dplyr::group_by(sex_rec, adm_age_cat)|> 
  dplyr::summarise(
    total_d_x = sum(d_x, na.rm = TRUE),      # Suma de muertes en el grupo
    total_l_x = sum(l_x, na.rm = TRUE),      # Suma de la población al inicio del grupo
    mean_m_x = mean(m_x),
    mortality_rate_grouped = total_d_x / total_l_x
  )

summarise() has grouped output by ‘sex_rec’. You can override using the .groups argument.

Code
basic_plot2 <-  ggplot(
  tablas_de_mortalidad_de_chile_1992_2050_agrupada, 
  aes(
    x = adm_age_cat, 
    fill = sex_rec, 
    y = ifelse(
      test = sex_rec == "male", 
      yes = -total_l_x, 
      no = total_l_x
    )
  )
) + 
  geom_bar(stat = "identity") 

population_pyramid2 <- basic_plot2 +
  scale_y_continuous(
    labels = abs,
    limits = max(subset(tablas_de_mortalidad_de_chile_1992_2050_agrupada, !is.na(sex_rec))$total_l_x) * c(-1,1)
  ) +
  coord_flip() +
  theme_minimal() +
  # Change scale_color_manual to scale_fill_manual
  scale_fill_manual(
    name = "Sex", # Change "Group" to "Sex" to match your labs()
    values = c(
      "female" = "#A6CEE3",
      "male" = "#1F78B4"
    ),
    na.translate = FALSE # Prevents NA from showing in legend if applicable
  ) +
  labs(
    x = "Age",
    y = "Population",
    fill = "Sex", # This matches the aesthetic and your new scale_fill_manual
    title = "Chilean population"
  )

# Create a basic bar chart for one gender
basic_plot <-  ggplot(
  senda_2010_2020_2015, 
  aes(
    x = disch_age_rec_cat, 
    fill = sex_rec, 
    y = ifelse(
      test = sex_rec == "male", 
      yes = -n, 
      no = n
    )
  )
) + 
  geom_bar(stat = "identity") 

plot_grid(population_pyramid+ theme(legend.position="none")+scale_y_continuous(
  labels = function(x) {
    scales::number_format(scale = 1e-6, suffix = "M")(abs(x))
  }), population_pyramid2+ labs(x=NULL)+ theme(axis.text.y = element_blank())+ scale_y_continuous(
  labels = function(x) {
    scales::number_format(scale = 1e-6, suffix = "M")(abs(x))
  }), ncol = 2)

Scale for y is already present. Adding another scale for y, which will replace the existing scale. Scale for y is already present. Adding another scale for y, which will replace the existing scale.

Code
ggsave(paste0(getwd(),"/_figs/pyramid.png"), dpi = 600, width = 9)

Saving 9 x 5 in image

Focused on treatment database

Given that the variable would be useful, we imported age of onset of substance use and combined different databases and get the mean to replace inconsistencies across records for each patient.

Code
CONS_C2_2324 <- c2_2324 %>% rename(edad_inicio = edad_inicio_sustancia_inicial, HASH_KEY=hashkey)
CONS_C2 <- CONS_C2 %>% rename(edad_inicio = edad_inicio_sustancia_inicial)
CONS_C4 <- CONS_C4 %>% rename(edad_inicio = edaddeiniciosustanciainicia)
CONS_C5 <- CONS_C5 %>% rename(edad_inicio = edad_inicio_sustancia_inicial)
CONS_C6 <- CONS_C6 %>% rename(edad_inicio = edaddeiniciosustanciainicia)
SISTRAT23_c1_2010_2022_df_prev1f <- SISTRAT23_c1_2010_2022_df_prev1f %>% 
  rename(edad_inicio = edad_inicio_consumo, HASH_KEY=hash_key)
SISTRAT23_c1_2023_2024_df2 <- SISTRAT23_c1_2023_2024_df2 %>% 
  rename(edad_inicio = edad_inicio_consumo, HASH_KEY=hash_key)

bases <- list(
  CONS_C2 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  CONS_C2_2324 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  CONS_C4 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  CONS_C5 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  CONS_C6 %>% dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  SISTRAT23_c1_2010_2022_df_prev1f %>%
    dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio)),
  SISTRAT23_c1_2023_2024_df2 %>%
    dplyr::select(HASH_KEY, edad_inicio) %>%
    dplyr::mutate(edad_inicio = as.numeric(edad_inicio))
)

Warning: There was 1 warning in dplyr::mutate(). ℹ In argument: edad_inicio = as.numeric(edad_inicio). Caused by warning: ! NAs introducidos por coerción There was 1 warning in dplyr::mutate(). ℹ In argument: edad_inicio = as.numeric(edad_inicio). Caused by warning: ! NAs introducidos por coerción

Code
# Unir todas las bases
edad_unificada <- bind_rows(bases)

# Calcular el promedio de edad por HASH_KEY
promedios_edades <- edad_unificada %>%
  group_by(HASH_KEY) %>%
  summarise(promedio_edad = mean(edad_inicio, na.rm = TRUE))
Code
pip install -U hf-transfer transformers kernels torch
invalid syntax (<string>, line 1)

We enhanced the dataset by categorizing admission age into meaningful groups and converting birth, admission, discharge, and death dates into continuous fractional year formats. Additionally, we created a macrozone variable based on regional groupings to facilitate the analysis of mortality distribution differences across geographic areas. The primary substance of concern was recoded into licit (alcohol) versus illicit (all other substances). Treatment compliance status was simplified into completed versus not completed categories, and the treatment plan type was classified as residential or ambulatory. Finally, we incorporated average age at substance use onset, merging it from an external dataset and using it to impute missing values in the onset age variable.

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv<- 
  SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv|> 
  mutate(adm_age_cat = dplyr::case_when(
    adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
    adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
    adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
    adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))|>
  filter(!is.na(adm_age_rec2))|> 
  mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec), 
         yr_fr_adm_date= year_fraction(adm_date_rec2), 
         yr_fr_disch_date= year_fraction(disch_date_rec6), 
         yr_fr_death_date_rec= year_fraction(death_date_rec))|> 
  mutate(macrozone = case_when(
    region_del_centro  %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
    region_del_centro  %in% c("de coquimbo", "de valparaiso")~ "2.Center",
    region_del_centro  %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
    region_del_centro  %in% c("de la araucania  ", "de los rios", "de los lagos") ~ "4.South",
    region_del_centro  %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
    TRUE ~ "Metropolitan"  # En caso de que algún código no esté especificado
  ))|>
  mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
  mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|> 
  mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|> 
  mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
  tidytable::select(-any_of(c("promedio_edad")))|> 
  left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|> 
  mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))

We replicated the actions for the database for sensitivity analysis.

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv<- 
  SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv|> 
  mutate(adm_age_cat = dplyr::case_when(
    adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
    adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
    adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
    adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))|>
  filter(!is.na(adm_age_rec2))|> 
  mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec), 
         yr_fr_adm_date= year_fraction(adm_date_rec2), 
         yr_fr_disch_date= year_fraction(disch_date_rec6), 
         yr_fr_death_date_rec= year_fraction(death_date_rec))|> 
  mutate(macrozone = case_when(
    region_del_centro  %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
    region_del_centro  %in% c("de coquimbo", "de valparaiso")~ "2.Center",
    region_del_centro  %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
    region_del_centro  %in% c("de la araucania  ", "de los rios", "de los lagos") ~ "4.South",
    region_del_centro  %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
    TRUE ~ "Metropolitan"  # En caso de que algún código no esté especificado
  ))|>
  mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
  mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|> 
  mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|> 
  mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
  tidytable::select(-any_of(c("promedio_edad")))|> 
  left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|> 
  mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))

2|

Code
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv<- 
  SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv|> 
  mutate(adm_age_cat = dplyr::case_when(
    adm_age_rec2 >= 18 & adm_age_rec2 < 30 ~ "18-29",
    adm_age_rec2 >= 30 & adm_age_rec2 < 45 ~ "30-44",
    adm_age_rec2 >= 45 & adm_age_rec2 < 60 ~ "45-59",
    adm_age_rec2 >= 60 & adm_age_rec2 < 65 ~ "60-64",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))|>
  filter(!is.na(adm_age_rec2))|> 
  mutate(yr_fr_birth_date_rec= year_fraction(birth_date_rec), 
         yr_fr_adm_date= year_fraction(adm_date_rec2), 
         yr_fr_disch_date= year_fraction(disch_date_rec6), 
         yr_fr_death_date_rec= year_fraction(death_date_rec))|> 
  mutate(macrozone = case_when(
    region_del_centro  %in% c("de arica y parinacota", "de tarapaca", "de antofagasta", "de atacama") ~ "1.North",
    region_del_centro  %in% c("de coquimbo", "de valparaiso")~ "2.Center",
    region_del_centro  %in% c("del libertador general bernardo ohiggins", "del maule", "del bio-bio") ~ "3.South-center",
    region_del_centro  %in% c("de la araucania  ", "de los rios", "de los lagos") ~ "4.South",
    region_del_centro  %in% c("de magallanes y la antartica chilena", "aysen") ~ "5.Austral",
    TRUE ~ "Metropolitan"  # En caso de que algún código no esté especificado
  ))|>
  mutate(prim_sub_licit=ifelse(primary_sub=="alcohol","licit","illicit"))|>
  mutate(tr_compliance_status= case_when(grepl("completion", tr_compliance_rec3)~ "Completed", grepl("dropout|discharge", tr_compliance_rec3)~ "Not completed"))|> 
  mutate(rm_norm= ifelse(macrozone=="Metropolitan",1,0))|> 
  mutate(res_plan= ifelse(grepl("pr", plan_type),1,0))|>
  tidytable::select(-any_of(c("promedio_edad")))|> 
  left_join( promedios_edades, by=c("hash_key"="HASH_KEY"), multiple="first")|> 
  mutate(sub_use_onset= case_when(promedio_edad>4~ promedio_edad, T~edad_inicio_consumo))
Code
# List of categorical variables
categorical_vars <- c("prim_sub_licit", "adm_age_cat", "res_plan", "sex_rec", "sub_dep_icd10_status",
                      "macrozone", "tr_compliance_status")

# List of numerical variables
numerical_vars <- c("adm_age_rec2", 
                    "dit_rec6", 
                    "death_age_rec",
                    "yr_fr_birth_date_rec", 
                    "yr_fr_adm_date",
                    "yr_fr_disch_date")

If we want to check observations by groups

Code
xtabs(~tr_compliance_status+ prim_sub_licit+ res_plan+ sex_rec+ adm_age_cat, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)

# , , res_plan = 1, sex_rec = female, adm_age_cat = 60-65
# Sólo 2 en not complited e ilicit

xtabs(~status+ tr_compliance_status+ prim_sub_licit+ res_plan+ sex_rec+ adm_age_cat, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)

#, , prim_sub_licit = illicit, res_plan = 1, sex_rec = female, adm_age_cat = 60-65
#, , prim_sub_licit = illicit, res_plan = 1, sex_rec = male, adm_age_cat = 60-65
#no hay mujeres ni hombres que mueran por ilícitas de 60-65

#tiene pero pocos mueren, sin importar si completan o no, en residenciales, sustancias lícitas (alcohol), 18-29
#prim_sub_licit = licit, res_plan = 1, sex_rec = male, adm_age_cat = 18-29
#prim_sub_licit = licit, res_plan = 1, sex_rec = female, adm_age_cat = 18-29

xtabs(~sex_rec+ adm_age_cat+ yr_adm, data= SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv)

Survival format

Check followup period

Code
with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv, summary(as.numeric(death_date_rec - disch_date_rec6)))

with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv, summary(as.numeric(death_date_rec - disch_date_rec6)))

with(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv, summary(as.numeric(death_date_rec - disch_date_rec6)))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -4016     855    1694    1730    2582    4007 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -4016     626    1401    1489    2284    4000 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -4016     822    1660    1708    2558    4007 

Check birthdate

Code
round(min(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_fr_birth_date_rec),0)
round(max(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv$yr_fr_birth_date_rec),0)
[1] 1946
[1] 2003

Identify negative followup period

Code
fot_years <- as.numeric((SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$death_date_rec - SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$disch_date_rec6) / 365.25)
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv, 
           difftime(death_date_rec, disch_date_rec6, unit="days"))/ 365.25)
)


table(fot_years<0)


neg_tr_d<- table(as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv, 
                      difftime(death_date_rec, disch_date_rec6, unit="days"))/ 365.25)
      <0)

cat("Negative days in treatment\n")
paste0(round((as.numeric(table(fot_years<0)[2])/222945)*100,1),"%")
paste0(round((neg_tr_d[[2]]/222945)*100,1),"%")

cat("Treatments over 3 years\n")
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv, 
           difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)
)
table(as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv, 
                      difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)>3)
cat("Treatments over 3 years\n")
psych::describe(
as.numeric(with(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv, 
           difftime(disch_date_rec6, adm_date_rec2, unit="days"))/ 365.25)
)
   vars     n mean   sd median trimmed  mad min   max range skew kurtosis   se
X1    1 77385 4.08 2.96   3.84    3.97 3.33 -11 10.95 21.95 0.25    -0.73 0.01

FALSE  TRUE 
70290  4180 
Negative days in treatment
[1] "1.9%"
[1] "2.8%"
Treatments over 3 years
   vars     n mean   sd median trimmed  mad   min  max range skew kurtosis se
X1    1 74470 0.63 0.55   0.48    0.55 0.38 -0.94 7.51  8.45 2.33     9.85  0

FALSE  TRUE 
74037   433 
Treatments over 3 years
   vars     n mean   sd median trimmed  mad   min  max range skew kurtosis se
X1    1 74470 0.63 0.55   0.48    0.55 0.38 -0.94 7.51  8.45 2.33     9.85  0

We discard missing values in sex, discharge and death dates and negative follow-up periods.

Code
disch_after_cens_death<- 
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>nrow()-
SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>
  filter(!is.na(disch_date_rec6),
               !is.na(death_date_rec),
               disch_date_rec6 < death_date_rec,     # evita seguimiento negativo
               !is.na(sex_rec)) |> nrow()
disch_after_cens_death
paste0(round((disch_after_cens_death/222945)*100,1),"%")
[1] 4256
[1] "1.9%"

We coded the variable drug dependence (ICD-10) and translated, as well as sex, and added the variable age at discharge coded by groups (18-29, 30-44, 45-59, 60+).

Code
clean_df <- SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv |>
  (\(df) {
    cat("Before discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
  filter(!is.na(disch_date_rec6),
         !is.na(death_date_rec),
         disch_date_rec6 < death_date_rec,     # evita seguimiento negativo
         adm_date_rec2  < disch_date_rec6,     # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a on días tto neg
         !is.na(sex_rec)) |>                   # grupos de agregación sin NA
  (\(df) {
    cat("After discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
mutate(
    # sub_dep_icd10_status: reference = "Hazardous consumption"
    sub_dep_icd10_status = factor(
      sub_dep_icd10_status,
      levels = c("Hazardous consumption", "Drug dependence")
    ),
    # sex_rec: reference = "Male"  (rename from lower-case if needed)
    sex_rec = case_when(                 # optional renaming step
                sex_rec == "male"   ~ "Male",
                sex_rec == "female" ~ "Female",
                TRUE                ~ sex_rec) %>%
              factor(levels = c("Male", "Female"))
  )|>
  mutate(disch_age_cat = dplyr::case_when(
    disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
    disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
    disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
    disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))

cat("Number of rows after cleaning the database:\n")
nrow(clean_df)
After discarding missing or discharge dates 
Before discarding missing or discharge dates 
[1] 74470
[1] 70064
Number of rows after cleaning the database:
[1] 70064

We formatted the database in survival setting (clean_df_corr_surv).

Code
clean_df_corr <- clean_df %>% 
  mutate(
    year_death = year(death_date_rec),
    # age_death  = as.numeric(difftime(death_date_rec, birth_date_rec,
    #                                  units = "days")) / 365.25#/ 365.2425#365.25
    #age_death= interval(birth_date_rec, death_date_rec)|>as.numeric('years')
    age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
  )

# 2.  Construct 'per' (2010-2021) similar to lexpand()
#     (intervals closed at left, open at right)
breaks_vec <- seq(as.Date("2010-01-01"), as.Date("2022-01-01"), by = "year")

clean_df_corr <- clean_df_corr %>% 
  mutate(
    per = cut(
      death_date_rec,
      breaks = breaks_vec,
      right  = FALSE,            # [2010, 2011)
      labels = 2010:2021
    ) %>% as.integer()
  )

# 3.  Label exclusion criteria
clean_df_corr <- clean_df_corr %>%
  mutate(
    excl_reason = case_when(
      status != 1                         ~ "alive / censored",
      !is.na(per) & age_death < 76        ~ "included",
      is.na(per)    & age_death >= 76     ~ "age≥76 & year≥2022",
      is.na(per)                          ~ "year≥2022",
      age_death >= 76                     ~ "age≥76",
      TRUE                                ~ "otro"
    )
  )

# 4.  Subgroups
muertes_per2021   <- clean_df_corr %>% filter(status == 1, per == 2021)
muertes_fuera_per <- clean_df_corr %>% filter(status == 1, is.na(per))          # ≥ 2022
muertes_age65plus <- clean_df_corr %>% filter(status == 1, age_death >= 76)

excluidos <- clean_df_corr %>%
  filter(status == 1, excl_reason != "incluido") %>%
  dplyr::select(rn, hash_key, death_date_rec, age_death, excl_reason)

cat("Maximum age at discharge:\n")
max(excluidos$age_death)
#[1] 74.42574

# 5.  Summary of exclusions
# clean_df_corr %>%
#   filter(status == 1) %>%
#   count(excl_reason, name = "n") %>%
#   arrange(desc(n))

start_fup <- as.Date("2010-01-01")
end_fup   <- as.Date("2020-12-31")

pyrs_raw <- clean_df_corr %>%
  ## Exclusion criteria of SIR function ------------------
mutate(
  age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
) %>%
  filter(
    ## Only people in SIR function
    (is.na(age_death) | age_death < 76),          # top age 75
    death_date_rec >= start_fup | is.na(death_date_rec), 
    disch_date_rec6 <= end_fup                    # entry ≤ 31-dic-2021
  ) %>%
  ## Exit date definiton (death/censorship) ----------------------
mutate(
  exit_date = coalesce(death_date_rec, end_fup),      # Alive → 31-dic-2021
  exit_date = pmin(exit_date, end_fup),               # Cut deaths after 2021
  follow_up_days = as.numeric(exit_date - disch_date_rec6),
  pyrs= time_length(interval(disch_date_rec6, exit_date), unit="year")
) %>%
  (\(df) {
    cat("Deaths \n")
    print(janitor::tabyl(df,status))
    cat("Number of rows \n")
    print(nrow(df))
    df ->>clean_df_corr_surv
  })() |> 
  summarise(total_pyrs = sum(pyrs, na.rm = TRUE)) %>%
  pull()

cat("Number of total person-years:\n")
pyrs_raw   # debería ~ 353826
cat("Difference in person-years, manual vs. SIR:\n")
pyrs_raw-353826

paste0(round(((pyrs_raw-353826)/353826)*100,3),"% of the PYs")

cat("Person-years (for article)\n")
psych::describe(clean_df_corr_surv$pyrs)
Maximum age at discharge:
[1] 74.42623
Deaths 
 status     n   percent
      0 67068 0.9572391
      1  2996 0.0427609
Number of rows 
[1] 70064
Number of total person-years:
[1] 353843
Difference in person-years, manual vs. SIR:
[1] 16.99475
[1] "0.005% of the PYs"
Person-years (for article)
   vars     n mean  sd median trimmed  mad min   max range skew kurtosis   se
X1    1 70064 5.05 2.8    4.9    4.99 3.29   0 10.97 10.97 0.16    -0.98 0.01
Code
cat("Maximum age at death\n")
clean_df_corr_surv|>  mutate(edad_salida = as.numeric((pmin(death_date_rec, disch_date_rec6 + follow_up_days) - birth_date_rec) / 365.2425)) |>
  summarise(max_age = max(edad_salida, na.rm = TRUE))

invisible("Trying to format it similar to lexpand function")

biostat3::survRate(Surv(pyrs, status==1)~1, data=clean_df_corr_surv)|>  
  dplyr::mutate(across(c("rate", "lower", "upper"),~sprintf("%1.1f",.*1000)))


sr_poredadsexanio <- biostat3::survRate(Surv(pyrs, status==1) ~ strata(sex_rec)+ 
                                      strata(agegroup)+ strata(year), data= clean_df_corr_surv|> 
                                      mutate(agegroup = cut(
                                        disch_age_rec,                   # la variable de edad
                                        breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
                                        right  = FALSE,                 # intervalo izquierdo cerrado  [15–30)
                                        labels = c("18-29", "30-44", "45-59", "60+"),
                                        include.lowest = TRUE           # 15 entra en el primer tramo
                                      ), 
                                      year= lubridate::year(as.Date(disch_date_num_rec6))
                                      )|> 
                                        filter(disch_age_rec>17, disch_age_rec<64)
)
Maximum age at death
   max_age
1 75.62921
   tstop event rate lower upper
1 353843  2996  8.5   8.2   8.8

We replicate the formatting of variables (translation and coding of discharge date) for patients in the last treatment.

Code
#Table SXX. All-cause mortality rate and standardized mortality rate for 
#patients who accessed SUD treatment by sex and age group, last treatment available

cat(paste0("QWhy the additional ", 3029-2996), " deaths in the last treatment? \n")

clean_df_b <- SISTRAT23_c1_2010_2022_df_prev1q_sel4b_surv |>
  (\(df) {
    cat("Before discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
  filter(!is.na(disch_date_rec6),
         !is.na(death_date_rec),
         disch_date_rec6 < death_date_rec,     # evita seguimiento negativo
         adm_date_rec2  < disch_date_rec6,     # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a con días tto neg
         !is.na(sex_rec)) |>                   # grupos de agregación sin NA
  (\(df) {
    cat("After discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
  mutate(
    # sub_dep_icd10_status: reference = "Hazardous consumption"
    sub_dep_icd10_status = factor(
      sub_dep_icd10_status,
      levels = c("Hazardous consumption", "Drug dependence")
    ),
    
    # sex_rec: reference = "Male"  (rename from lower-case if needed)
    sex_rec = case_when(                 # optional renaming step
      sex_rec == "male"   ~ "Male",
      sex_rec == "female" ~ "Female",
      TRUE                ~ sex_rec) %>%
      factor(levels = c("Male", "Female"))
  )|>
  mutate(disch_age_cat = dplyr::case_when(
    disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
    disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
    disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
    disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))
nrow(clean_df_b)
QWhy the additional 33  deaths in the last treatment? 
After discarding missing or discharge dates 
Before discarding missing or discharge dates 
[1] 77385
[1] 70913
[1] 70913
Code
warning(paste0("2025-08-03: ACC warned me that I may leaving deaths out due to being before tratment finishes: ",(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_rec6 > death_date_rec, death_date_rec!="2020-12-31") ) |> nrow(), ", so i need to improve the change in 2025-06-19 to account for these users"))

Warning: 2025-08-03: ACC warned me that I may leaving deaths out due to being before tratment finishes: 369, so i need to improve the change in 2025-06-19 to account for these users

Code
invisible("Completed treatments")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec) |> filter(tr_compliance_status=="Completed") |> nrow()
invisible("17")

invisible("Differences over than one year")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec) |> filter(diff>1)
invisible("80")

# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") ) |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> View()

invisible("58 have admission date greater than death date")
# (SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv |>filter(!is.na(death_date_rec),disch_date_corr > death_date_rec, as.Date(disch_date_corr)<"2021-01-01") )  |> dplyr::select(hash_key, adm_age_rec2, yr_fr_adm_date, yr_fr_disch_date, yr_fr_death_date_rec, tr_compliance_status) |> mutate(diff=yr_fr_disch_date-yr_fr_death_date_rec, diff2=yr_fr_adm_date-yr_fr_death_date_rec) |> filter(diff2>1)

cat("2025-06-19: Add median days in treatment if missing discharge date\n")
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr <- 
  ifelse(is.na(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_rec6),
         SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2+as.numeric(quantile(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$dit_rec6, .5)), SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_rec6)

cat("2025-08-03: Add median days in treatment if missing discharge date, or if its over death date\n")
SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr2 <- 
  ifelse(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr>SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$death_date_rec & !is.na(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$death_date_rec)& as.Date(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr)<"2021-01-01",
         SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$adm_date_rec2+as.numeric(quantile(SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$dit_rec6, .5)), SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv$disch_date_corr)


clean_df_c <- SISTRAT23_c1_2010_2022_df_prev1q_sel4c_surv|>
  mutate(disch_date_corr= as.Date(disch_date_corr2))|> #changed at 25-08-03
  (\(df) {
    cat("Before discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
  filter(!is.na(disch_date_corr),
         !is.na(death_date_rec),
         disch_date_corr <= death_date_rec,     # evita seguimiento negativo  #2025-08-03: changed censorship to allow more deceased to enter
         adm_date_rec2  < disch_date_corr,     # para sacar 831d9f7b2771ce2701ae4a4417f26e70f2e2acb21d21c6fe7ff1e766c9792d3a on días tto neg
         !is.na(sex_rec)) |>                   # grupos de agregación sin NA
  (\(df) {
    cat("After discarding missing or discharge dates \n")
    print(nrow(df))    
    df
  })() |> 
mutate(
    # sub_dep_icd10_status: reference = "Hazardous consumption"
    sub_dep_icd10_status = factor(
      sub_dep_icd10_status,
      levels = c("Hazardous consumption", "Drug dependence")
    ),
    # sex_rec: reference = "Male"  (rename from lower-case if needed)
    sex_rec = case_when(                 # optional renaming step
                sex_rec == "male"   ~ "Male",
                sex_rec == "female" ~ "Female",
                TRUE                ~ sex_rec) %>%
              factor(levels = c("Male", "Female"))
  )|>
  mutate(disch_age_cat = dplyr::case_when(
    disch_age_rec >= 18 & disch_age_rec < 30 ~ "18-29",
    disch_age_rec >= 30 & disch_age_rec < 45 ~ "30-44",
    disch_age_rec >= 45 & disch_age_rec < 60 ~ "45-59",
    disch_age_rec >= 60 & disch_age_rec < 86 ~ "60+",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ))

cat("Number of rows after cleaning the database:\n")
nrow(clean_df_c)

cat("2025-08-03: To avoid dropped XX rows where entry == exit\n")
clean_df_c$disch_date_corr <- as.Date(
  ifelse(clean_df_c$disch_date_corr == clean_df_c$death_date_rec,
         as.integer(clean_df_c$disch_date_corr) - 1L,
         as.integer(clean_df_c$disch_date_corr)),
  origin = "1970-01-01"
)


clean_df_c_corr <- clean_df_c%>% 
  mutate(
    year_death = year(death_date_rec),
    # age_death  = as.numeric(difftime(death_date_rec, birth_date_rec,
    #                                  units = "days")) / 365.25#/ 365.2425#365.25
    #age_death= interval(birth_date_rec, death_date_rec)|>as.numeric('years')
    age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
  )

# 2.  Construct 'per' (2010-2021) similar to lexpand()
#     (intervals closed at left, open at right)
breaks_vec <- seq(as.Date("2010-01-01"), as.Date("2021-01-01"), by = "year")

clean_df_c_corr <- clean_df_c_corr %>% 
  mutate(
    per = cut(
      death_date_rec,
      breaks = breaks_vec,
      right  = FALSE,            # [2010, 2011)
      labels = 2010:2020 #modify from 2021 to 2020
    ) %>% as.integer()
  )

# 3.  Label exclusion criteria
clean_df_c_corr <- clean_df_c_corr%>%
  mutate(
    excl_reason = case_when(
      status != 1                         ~ "alive / censored",
      !is.na(per) & age_death < 76        ~ "included",
      is.na(per)    & age_death >= 76     ~ "age≥76 & year≥2022",
      is.na(per)                          ~ "year≥2022",
      age_death >= 76                     ~ "age≥76",
      TRUE                                ~ "otro"
    )
  )

# 4.  Subgroups
muertes_per2021_c   <- clean_df_c_corr %>% filter(status == 1, per == 2021)
muertes_fuera_per_c <- clean_df_c_corr %>% filter(status == 1, is.na(per))          # ≥ 2022
muertes_age65plus_c<- clean_df_c_corr %>% filter(status == 1, age_death >= 76)

excluidos <- clean_df_c_corr %>%
  filter(status == 1, excl_reason != "included") %>%
  dplyr::select(rn, hash_key, death_date_rec, age_death, excl_reason)

cat("Maximum age at discharge:\n")
max(excluidos$age_death, na.rm=T)

Warning in max(excluidos$age_death, na.rm = T): ningun argumento finito para max; retornando -Inf

Code
#[1] 74.42623

# 5.  Summary of exclusions
# clean_df_corr %>%
#   filter(status == 1) %>%
#   count(excl_reason, name = "n") %>%
#   arrange(desc(n))

start_fup_c <- as.Date("2010-01-01")
end_fup_c   <- as.Date("2020-12-31")

pyrs_raw_c <- clean_df_c_corr %>%
  ## Exclusion criteria of SIR function ------------------
mutate(
  age_death= time_length(interval(birth_date_rec, death_date_rec), unit="year")
) %>%
  filter(
    ## Only people in SIR function
    (is.na(age_death) | age_death < 76),          # top age 75
    death_date_rec >= start_fup_c | is.na(death_date_rec), 
    disch_date_rec6 <= end_fup_c                    # entry ≤ 31-dic-2021
  ) %>%
  ## Exit date definiton (death/censorship) ----------------------
mutate(
  exit_date = coalesce(death_date_rec, end_fup_c),      # Alive → 31-dic-2021
  exit_date = pmin(exit_date, end_fup_c),               # Cut deaths after 2021
  follow_up_days = as.numeric(exit_date - disch_date_corr),
  pyrs= time_length(interval(disch_date_corr, exit_date), unit="year")
) %>%
  (\(df) {
    cat("Deaths \n")
    print(janitor::tabyl(df,status))
    cat("Number of rows \n")
    print(nrow(df))
    df ->>clean_df_corr_surv_c
  })() |> 
  summarise(total_pyrs = sum(pyrs, na.rm = TRUE)) %>%
  pull()

cat("Number of total person-years:\n")
pyrs_raw_c   # debería ~ 353826
cat("Difference in person-years, manual vs. SIR:\n")
pyrs_raw_c-4249152

paste0(round(((pyrs_raw_c-4249152)/4249152)*100,3),"% of the PYs")

cat("2025-08-03: Differences with previous database with unfinished treatments but w/o patients deceased before finishing them\n")
pyrs_raw_c-416166
cat("Added patients: 2025-08-03: ");table(clean_df_corr_surv_c$status)[[2]]-3643

cat("Mean pyrs by user: \n")
round((table(clean_df_corr_surv_c$status)[[2]]-3643)/(pyrs_raw_c-416166),2)
2025-06-19: Add median days in treatment if missing discharge date
2025-08-03: Add median days in treatment if missing discharge date, or if its over death date
After discarding missing or discharge dates 
Before discarding missing or discharge dates 
[1] 88774
[1] 83782
Number of rows after cleaning the database:
[1] 83782
2025-08-03: To avoid dropped XX rows where entry == exit
Maximum age at discharge:
[1] -Inf
Deaths 
 status     n    percent
      0 79965 0.95444129
      1  3817 0.04555871
Number of rows 
[1] 83782
Number of total person-years:
[1] 416305.6
Difference in person-years, manual vs. SIR:
[1] -3832846
[1] "-90.203% of the PYs"
2025-08-03: Differences with previous database with unfinished treatments but w/o patients deceased before finishing them
[1] 139.5834
Added patients: 2025-08-03: [1] 174
Mean pyrs by user: 
[1] 1.25


Descriptives

Code
categorical_vars_corr<- 
c(categorical_vars,"disch_age_cat")

# Create summaries for categorical variables
categorical_summaries_cln <- lapply(
  c(categorical_vars_corr),# <- object you iterate over
  summarize_categorical_tt,           # <- function
  .data = clean_df                    # <- extra argument “.data =”
)

Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0. ℹ Please use all_of() or any_of() instead. # Was: data %>% select(var)

# Now: data %>% select(all_of(var))

See https://tidyselect.r-lib.org/reference/faq-external-vector.html.

Code
names(categorical_summaries_cln) <- categorical_vars_corr
# Create summaries for numerical variables
numerical_summaries_cln <- lapply(
  c(numerical_vars, "disch_age_rec"),# <- object you iterate over
  \(v) summarize_numerical_tt(clean_df, v)
)
names(numerical_summaries_cln) <- c(numerical_vars, "disch_age_rec")
Code
table_one_clean <- CreateTableOne(
  vars = c(categorical_vars, numerical_vars,"disch_age_cat", "disch_age_rec"),
  data = clean_df,
  factorVars = c(categorical_vars,"disch_age_cat"),
  strata = "status",
  addOverall = TRUE,  # Incluir totales
  test = FALSE,        # Realizar pruebas de diferencias
  smd = TRUE          # Incluir SMDs
)
table_one_clean_print <- print(table_one_clean, 
                         nonnormal = c(numerical_vars, "disch_age_rec"), 
                         formatOptions = list(big.mark = ","),
                         quote = TRUE, 
                         noSpaces = TRUE,
                         showAllLevels = TRUE,  # Mostrar todos los niveles de variables categóricas
                         missing = FALSE,        # Incluir valores perdidos
                         explain = FALSE,        # Añadir explicación de las variables
                         digits = c(adm_age_rec2 = 1, dit_rec6 = 1, yr_fr_birth_date_rec = 1), smd = TRUE)

table_one_clean_print |> 
  knitr::kable("html", digits=1) |> 
  kableExtra::kable_classic() |> 
  kableExtra::scroll_box(height="400px")
write.table(table_one_clean_print, file = paste0(getwd(),"/_out/table_one_clean.txt"), sep = "\t", row.names = FALSE)
                        "Stratified by status"
 ""                      "level"                
  "n"                    ""                     
  "prim_sub_licit"       "illicit"              
  ""                     "licit"                
  "adm_age_cat"          "18-29"                
  ""                     "30-44"                
  ""                     "45-59"                
  ""                     "60-64"                
  "res_plan"             "0"                    
  ""                     "1"                    
  "sex_rec"              "Male"                 
  ""                     "Female"               
  "sub_dep_icd10_status" "Hazardous consumption"
  ""                     "Drug dependence"      
  "macrozone"            "1.North"              
  ""                     "2.Center"             
  ""                     "3.South-center"       
  ""                     "4.South"              
  ""                     "5.Austral"            
  ""                     "Metropolitan"         
  "tr_compliance_status" "Completed"            
  ""                     "Not completed"        
  "adm_age_rec2"         ""                     
  "dit_rec6"             ""                     
  "death_age_rec"        ""                     
  "yr_fr_birth_date_rec" ""                     
  "yr_fr_adm_date"       ""                     
  "yr_fr_disch_date"     ""                     
  "disch_age_cat"        "18-29"                
  ""                     "30-44"                
  ""                     "45-59"                
  ""                     "60+"                  
  "disch_age_rec"        ""                     
                        "Stratified by status"
 ""                      "Overall"                      
  "n"                    "70,064"                       
  "prim_sub_licit"       "46048 (65.7)"                 
  ""                     "24016 (34.3)"                 
  "adm_age_cat"          "24341 (34.7)"                 
  ""                     "31485 (44.9)"                 
  ""                     "13098 (18.7)"                 
  ""                     "1140 (1.6)"                   
  "res_plan"             "61539 (87.8)"                 
  ""                     "8525 (12.2)"                  
  "sex_rec"              "53331 (76.1)"                 
  ""                     "16733 (23.9)"                 
  "sub_dep_icd10_status" "20248 (28.9)"                 
  ""                     "49816 (71.1)"                 
  "macrozone"            "7988 (11.4)"                  
  ""                     "7310 (10.4)"                  
  ""                     "11943 (17.0)"                 
  ""                     "3332 (4.8)"                   
  ""                     "782 (1.1)"                    
  ""                     "38709 (55.2)"                 
  "tr_compliance_status" "19185 (27.4)"                 
  ""                     "50879 (72.6)"                 
  "adm_age_rec2"         "34.12 [27.43, 42.86]"         
  "dit_rec6"             "165.00 [92.00, 290.00]"       
  "death_age_rec"        "39.00 [33.00, 48.00]"         
  "yr_fr_birth_date_rec" "1,980.99 [1,972.24, 1,987.71]"
  "yr_fr_adm_date"       "2,015.32 [2,013.08, 2,017.50]"
  "yr_fr_disch_date"     "2,015.93 [2,013.66, 2,018.08]"
  "disch_age_cat"        "22977 (32.8)"                 
  ""                     "31881 (45.5)"                 
  ""                     "13739 (19.6)"                 
  ""                     "1467 (2.1)"                   
  "disch_age_rec"        "34.66 [27.94, 43.50]"         
                        "Stratified by status"
 ""                      "0"                            
  "n"                    "67,068"                       
  "prim_sub_licit"       "44794 (66.8)"                 
  ""                     "22274 (33.2)"                 
  "adm_age_cat"          "23896 (35.6)"                 
  ""                     "30287 (45.2)"                 
  ""                     "11895 (17.7)"                 
  ""                     "990 (1.5)"                    
  "res_plan"             "59051 (88.0)"                 
  ""                     "8017 (12.0)"                  
  "sex_rec"              "50935 (75.9)"                 
  ""                     "16133 (24.1)"                 
  "sub_dep_icd10_status" "19545 (29.1)"                 
  ""                     "47523 (70.9)"                 
  "macrozone"            "7695 (11.5)"                  
  ""                     "7085 (10.6)"                  
  ""                     "11433 (17.0)"                 
  ""                     "3167 (4.7)"                   
  ""                     "730 (1.1)"                    
  ""                     "36958 (55.1)"                 
  "tr_compliance_status" "18514 (27.6)"                 
  ""                     "48554 (72.4)"                 
  "adm_age_rec2"         "33.81 [27.27, 42.35]"         
  "dit_rec6"             "166.00 [92.00, 290.00]"       
  "death_age_rec"        "39.00 [33.00, 48.00]"         
  "yr_fr_birth_date_rec" "1,981.34 [1,972.76, 1,987.93]"
  "yr_fr_adm_date"       "2,015.39 [2,013.15, 2,017.55]"
  "yr_fr_disch_date"     "2,016.01 [2,013.73, 2,018.16]"
  "disch_age_cat"        "22566 (33.6)"                 
  ""                     "30699 (45.8)"                 
  ""                     "12526 (18.7)"                 
  ""                     "1277 (1.9)"                   
  "disch_age_rec"        "34.35 [27.77, 43.01]"         
                        "Stratified by status"
 ""                      "1"                             "SMD"  
  "n"                    "2,996"                         ""     
  "prim_sub_licit"       "1254 (41.9)"                   "0.517"
  ""                     "1742 (58.1)"                   ""     
  "adm_age_cat"          "445 (14.9)"                    "0.657"
  ""                     "1198 (40.0)"                   ""     
  ""                     "1203 (40.2)"                   ""     
  ""                     "150 (5.0)"                     ""     
  "res_plan"             "2488 (83.0)"                   "0.143"
  ""                     "508 (17.0)"                    ""     
  "sex_rec"              "2396 (80.0)"                   "0.097"
  ""                     "600 (20.0)"                    ""     
  "sub_dep_icd10_status" "703 (23.5)"                    "0.129"
  ""                     "2293 (76.5)"                   ""     
  "macrozone"            "293 (9.8)"                     "0.139"
  ""                     "225 (7.5)"                     ""     
  ""                     "510 (17.0)"                    ""     
  ""                     "165 (5.5)"                     ""     
  ""                     "52 (1.7)"                      ""     
  ""                     "1751 (58.4)"                   ""     
  "tr_compliance_status" "671 (22.4)"                    "0.120"
  ""                     "2325 (77.6)"                   ""     
  "adm_age_rec2"         "43.41 [34.32, 51.16]"          "0.698"
  "dit_rec6"             "156.00 [91.00, 288.00]"        "0.020"
  "death_age_rec"        "47.00 [38.00, 54.00]"          "0.521"
  "yr_fr_birth_date_rec" "1,970.72 [1,963.02, 1,979.52]" "0.826"
  "yr_fr_adm_date"       "2,013.82 [2,012.02, 2,015.66]" "0.507"
  "yr_fr_disch_date"     "2,014.42 [2,012.50, 2,016.34]" "0.507"
  "disch_age_cat"        "411 (13.7)"                    "0.653"
  ""                     "1182 (39.5)"                   ""     
  ""                     "1213 (40.5)"                   ""     
  ""                     "190 (6.3)"                     ""     
  "disch_age_rec"        "43.99 [34.72, 51.81]"          "0.691"
"level" "Overall" "0" "1" "SMD"
"n" 70,064 67,068 2,996
"prim_sub_licit" illicit 46048 (65.7) 44794 (66.8) 1254 (41.9) 0.517
"" licit 24016 (34.3) 22274 (33.2) 1742 (58.1)
"adm_age_cat" 18-29 24341 (34.7) 23896 (35.6) 445 (14.9) 0.657
"" 30-44 31485 (44.9) 30287 (45.2) 1198 (40.0)
"" 45-59 13098 (18.7) 11895 (17.7) 1203 (40.2)
"" 60-64 1140 (1.6) 990 (1.5) 150 (5.0)
"res_plan" 0 61539 (87.8) 59051 (88.0) 2488 (83.0) 0.143
"" 1 8525 (12.2) 8017 (12.0) 508 (17.0)
"sex_rec" Male 53331 (76.1) 50935 (75.9) 2396 (80.0) 0.097
"" Female 16733 (23.9) 16133 (24.1) 600 (20.0)
"sub_dep_icd10_status" Hazardous consumption 20248 (28.9) 19545 (29.1) 703 (23.5) 0.129
"" Drug dependence 49816 (71.1) 47523 (70.9) 2293 (76.5)
"macrozone" 1.North 7988 (11.4) 7695 (11.5) 293 (9.8) 0.139
"" 2.Center 7310 (10.4) 7085 (10.6) 225 (7.5)
"" 3.South-center 11943 (17.0) 11433 (17.0) 510 (17.0)
"" 4.South 3332 (4.8) 3167 (4.7) 165 (5.5)
"" 5.Austral 782 (1.1) 730 (1.1) 52 (1.7)
"" Metropolitan 38709 (55.2) 36958 (55.1) 1751 (58.4)
"tr_compliance_status" Completed 19185 (27.4) 18514 (27.6) 671 (22.4) 0.120
"" Not completed 50879 (72.6) 48554 (72.4) 2325 (77.6)
"adm_age_rec2" 34.12 [27.43, 42.86] 33.81 [27.27, 42.35] 43.41 [34.32, 51.16] 0.698
"dit_rec6" 165.00 [92.00, 290.00] 166.00 [92.00, 290.00] 156.00 [91.00, 288.00] 0.020
"death_age_rec" 39.00 [33.00, 48.00] 39.00 [33.00, 48.00] 47.00 [38.00, 54.00] 0.521
"yr_fr_birth_date_rec" 1,980.99 [1,972.24, 1,987.71] 1,981.34 [1,972.76, 1,987.93] 1,970.72 [1,963.02, 1,979.52] 0.826
"yr_fr_adm_date" 2,015.32 [2,013.08, 2,017.50] 2,015.39 [2,013.15, 2,017.55] 2,013.82 [2,012.02, 2,015.66] 0.507
"yr_fr_disch_date" 2,015.93 [2,013.66, 2,018.08] 2,016.01 [2,013.73, 2,018.16] 2,014.42 [2,012.50, 2,016.34] 0.507
"disch_age_cat" 18-29 22977 (32.8) 22566 (33.6) 411 (13.7) 0.653
"" 30-44 31881 (45.5) 30699 (45.8) 1182 (39.5)
"" 45-59 13739 (19.6) 12526 (18.7) 1213 (40.5)
"" 60+ 1467 (2.1) 1277 (1.9) 190 (6.3)
"disch_age_rec" 34.66 [27.94, 43.50] 34.35 [27.77, 43.01] 43.99 [34.72, 51.81] 0.691

Lexis format

We generated three derivative datasets to support the mortality analyses. First, c_SISTRAT_c1 expands the original cohort (clean_df) into a Lexis structure with lexpand(), splitting each subject’s follow-up by calendar year (2010–2020) and attained-age bands (18-29, 30-44, 45-59, 60-75 years). Each resulting row holds the person-years (pyrs) and deaths (from0to1) for a unique combination of age group, calendar year and sex—ready for fast calculation of standardized mortality ratios. c_SISTRAT_c1_b repeats the identical procedure on the database of the last followed treatment clean_df_b, providing an audit trail for sensitivity checks. Finally, c_SISTRAT_c1_lex is built with Epi::Lexis(), keeping the data at episode level: entry is the discharge date (calendar time and age), exit is death or censoring, and exit status codes the event. This individual-level object lets us run detailed time-to-event models and plot trajectories, while the aggregated tables handle all rate-based summaries. In 2025-06-19, we added clean_df_c to check the SMRs of patients without finished treatments, by imputing days in treatment for patients with incomplete treatments with the median of days in treatment (i.e., 151 days).

Code
c_SISTRAT_c1 <- lexpand( clean_df, 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_amb <- lexpand(subset(clean_df, res_plan==0), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_res <- lexpand(subset(clean_df, res_plan==1), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_licit <- lexpand(subset(clean_df, prim_sub_licit=="licit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_illicit <- lexpand(subset(clean_df, prim_sub_licit=="illicit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_comp <- lexpand(subset(clean_df, !grepl("Not", tr_compliance_status)), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_not_comp <- lexpand(subset(clean_df, grepl("Not", tr_compliance_status)),
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))          
              
c_SISTRAT_c1_b <- lexpand( clean_df_b, 
             status = status, 
             birth = birth_date_rec, 
             exit = death_date_rec, entry = disch_date_rec6,
             breaks = list(per = seq(2010, 2021, by = 1), age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
             aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_b_amb <- lexpand(subset(clean_df_b, res_plan==0), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_res <- lexpand(subset(clean_df_b, res_plan==1), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_b_licit <- lexpand(subset(clean_df_b, prim_sub_licit=="licit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_illicit <- lexpand(subset(clean_df_b, prim_sub_licit=="illicit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_b_comp <- lexpand(subset(clean_df_b, !grepl("Not", tr_compliance_status)), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_b_not_comp <- lexpand(subset(clean_df_b, grepl("Not", tr_compliance_status)),
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_rec6,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))



c_SISTRAT_c1_c <- popEpi::lexpand( clean_df_c, 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_c_amb <- lexpand(subset(clean_df_c, res_plan==0), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_res <- lexpand(subset(clean_df_c, res_plan==1), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_c_licit <- lexpand(subset(clean_df_c, prim_sub_licit=="licit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_illicit <- lexpand(subset(clean_df_c, prim_sub_licit=="illicit"), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))

c_SISTRAT_c1_c_comp <- lexpand(subset(clean_df_c, !grepl("Not", tr_compliance_status)), 
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))
c_SISTRAT_c1_c_not_comp <- lexpand(subset(clean_df_c, grepl("Not", tr_compliance_status)),
              status = status, 
              birth = birth_date_rec, 
              exit = death_date_rec, entry = disch_date_corr,
              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
              breaks = list(per = seq(2010, 2021, by = 1), 
              #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
              aggre = list(agegroup = age, year = per, sex= sex_rec))


Chilean population statistics & UN mortality-tables

We imported population projections for individuals aged 18 to 76 covering the years 2010 to 2020. After cleaning and reshaping the data, we grouped it by year, age, and sex. Next, we loaded mortality tables to weight by population—by year, sex, and age group to support our further analyses.

We converted the UN 5-year intervals abridged life-table (cons_rate_sex_5x1) into the reference rates used for the SIR. First, we retrieved each interval’s starting age (start_age) and width (n = 1 year for age 0, 4 years for 1–4, 5 years thereafter). Using \(q_x\) we derived the instantaneous hazard for the interval, λₓ = –log(1 – qₓ)/n, and reconstructed the number of deaths (lx × qₓ) and the corresponding person-years of exposure (deaths / λₓ). We then collapsed the life-table into the study’s four attained-age bands—18-29, 30-44, 45-59, 60-75 years—summing deaths and exposure within each (by calendar year and sex) and finally computed the average population hazard λ = deaths / PY for every sex-year-age-band cell. We did not use the \(m_x\) (crude central death rate) . The resulting table (popmort_lt_cl_banded) contains sex, year, agegroup, and haz (population-weighted hazards), and serves as the external mortality reference in the SIR calculation.

Code
proy_ine_com_18_75<-
  rio::import("https://www.ine.gob.cl/docs/default-source/proyecciones-de-poblacion/cuadros-estadisticos/base-2017/ine_estimaciones-y-proyecciones-2002-2035_base-2017_comunas0381d25bc2224f51b9770a705a434b74.csv?sfvrsn=b6e930a7_3&download=true")|> 
  tidyr::pivot_longer(cols = dplyr::starts_with("Poblacion"), 
                      names_to = "anio", 
                      values_to = "poblacion")|> 
  dplyr::mutate(anio= gsub("Poblacion ","",anio), anio=as.numeric(anio))|> 
  dplyr::filter(anio>=2010 & anio<=2020, Edad>=18 & Edad<76)|> 
  dplyr::mutate(sex_rec= ifelse(`Sexo (1=Hombre 2=Mujer)`==2,"female","male"))|> 
  group_by(anio, Edad, sex_rec)|>
  summarise(pop=sum(poblacion, na.rm=T))|>
  ungroup()|>
  rename("age"="Edad", "year"="anio")

summarise() has grouped output by ‘anio’, ‘Edad’. You can override using the .groups argument.

Code
mx_1x1_banded <- mx_1x1_filt2|>
  mutate(
    year = Year,              # renombramos para que coincida con popmort
    # agegroup = 
      # case_when(         # crea bandas con fcase() de tidytable
      # Age >= 18 & Age <= 29~ "18-29",
      # Age >= 30 & Age <= 44~ "30-44",
      # Age >= 45 & Age <= 59~ "45-59",
      # Age >= 60 & Age <= 76~ "60+",
      # T~ NA_character_
      agegroup = case_when(         # crea bandas con fcase() de tidytable
      Age >= 18 & Age <= 29~ 18,
      Age >= 30 & Age <= 44~ 30,
      Age >= 45 & Age <= 59~ 45,
      Age >= 60 & Age <= 76~ 60,
      T~ NA_real_
    )
  )|> 
  filter(!is.na(agegroup))|>    # descarta edades fuera de 18-65
  left_join(proy_ine_com_18_75, by= c("Year"="year", "sex"="sex_rec", "Age"="age"))|>
  summarise(
    haz = weighted.mean(lambda_p_yrs, w = pop, na.rm = TRUE),   # tasa media del grupo
    mx = weighted.mean(mx, w = pop, na.rm = TRUE),
    .by = c(year, sex, agegroup),
    #.groups = "drop"
  )|>
  arrange(year, sex, agegroup)|> 
  ungroup()|> 
  mutate(sex= ifelse(sex=="female","Female","Male"))

Stantardized mortality rates- Indirect

We then estimated the cohort’s excess mortality with sir(). The function compares the deaths and person-years in the aggregated Lexis file (c_SISTRAT_c1, columns from0to1 and pyrs) with the population hazards prepared above (mx_1x1_banded, column haz). By specifying adjust = c("agegroup", "year", "sex"), the standardized mortality ratio (SIR) is stratified on the same four attained-age bands, calendar year, and sex that define the reference table. The call returns:

  • the overall SIR with profile-likelihood 95 % CI,
  • the expected deaths under population rates,
  • total person-years, and
  • the excess absolute risk (EAR), i.e. additional deaths per 1 000 PY (deaths each 100 patients followed over a 10-year period).

These values quantify both the relative (SIR) and absolute (EAR) mortality burden of the treated SUD cohort versus the Chilean general population.

The single-row dataset (clean_df_corr_surv) parks every patient’s entire follow-up in the calendar year and age they had at admission. All their person-years therefore stay in strata with younger ages and earlier years, where population mortality rates are low, so the expected deaths are artificially small and the SMR inflates. lexpand() creates c_SISTRAT_c1, a Lexis table that splits each record every time the person moves to a new calendar year or ages into a new band (18-29, 30-44, 45-59, 60-75). Person-years are thus re-allocated to the true year-age cells, many of which carry higher background rates; expected deaths rise and the SIR drops. Bottom line: for any SMR/SIR, Poisson or GLM with offset log(PY), always use the time-split Lexis version so that exposure is counted in the correct year and attained-age stratum.

Code
sr_poredadsexanio <- biostat3::survRate(Surv(pyrs, status==1) ~ strata(sex_rec)+ 
                      strata(agegroup)+ strata(year), data= 
                        clean_df_corr_surv|> 
                      mutate(agegroup = cut(
                        disch_age_rec,                   # la variable de edad
                        breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
                        right  = FALSE,                 # intervalo izquierdo cerrado  [15–30)
                        labels = c("18-29", "30-44", "45-59", "60+"),
                        include.lowest = TRUE           # 15 entra en el primer tramo
                      ), 
                      #year= lubridate::year(as.Date(exit_date))
                      year = lubridate::year(as.Date(disch_date_num_rec6))  # USE DISCHARGE YEAR
                      )|> 
                      filter(disch_age_rec>17, disch_age_rec<76))
#this approach for calculating adjusted SIR is not advisable because what I explained in quarto.

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

df_glm <- c_SISTRAT_c1 %>%
  # agegroup to labels
  mutate(
    agegroup = case_when(
      agegroup == 18 ~ "18-29",
      agegroup == 30 ~ "30-44",
      agegroup == 45 ~ "45-59",
      agegroup == 60 ~ "60+"
    ),
    sex = as.character(sex)
  ) %>%
  # Join with reference life-tables
  left_join(
    mx_1x1_banded %>%
      mutate(
        agegroup = case_when(
          agegroup == 18 ~ "18-29",
          agegroup == 30 ~ "30-44",
          agegroup == 45 ~ "45-59",
          agegroup == 60 ~ "60+"
        ),
        sex = as.character(sex)
      ),
    by = c("agegroup", "year", "sex")
  ) %>%
  # Expected events
  mutate(expected = pyrs * haz)


model_poisson <- glm(
    from0to1 ~ 1,
    family = poisson,
    offset = log(expected),
    data = df_glm
)
pearson_chisq <- sum(residuals(model_poisson, type = "pearson")^2)
df_residual <- df.residual(model_poisson)
dispersion_index <- pearson_chisq / df_residual
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
df_glm_b <- c_SISTRAT_c1_b %>%
  # agegroup to labels
  mutate(
    agegroup = case_when(
      agegroup == 18 ~ "18-29",
      agegroup == 30 ~ "30-44",
      agegroup == 45 ~ "45-59",
      agegroup == 60 ~ "60+"
    ),
    sex = as.character(sex)
  ) %>%
  # Join with reference life-tables
  left_join(
    mx_1x1_banded %>%
      mutate(
        agegroup = case_when(
          agegroup == 18 ~ "18-29",
          agegroup == 30 ~ "30-44",
          agegroup == 45 ~ "45-59",
          agegroup == 60 ~ "60+"
        ),
        sex = as.character(sex)
      ),
    by = c("agegroup", "year", "sex")
  ) %>%
  # Expected events
  mutate(expected = pyrs * haz)


model_poisson_b <- glm(
    from0to1 ~ 1,
    family = poisson,
    offset = log(expected),
    data = df_glm_b
)
pearson_chisq_b <- sum(residuals(model_poisson_b, type = "pearson")^2)
df_residual_b <- df.residual(model_poisson_b)
dispersion_index_b <- pearson_chisq_b / df_residual_b
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
df_glm_c <- c_SISTRAT_c1_c %>%
  # agegroup to labels
  mutate(
    agegroup = case_when(
      agegroup == 18 ~ "18-29",
      agegroup == 30 ~ "30-44",
      agegroup == 45 ~ "45-59",
      agegroup == 60 ~ "60+"
    ),
    sex = as.character(sex)
  ) %>%
  # Join with reference life-tables
  left_join(
    mx_1x1_banded %>%
      mutate(
        agegroup = case_when(
          agegroup == 18 ~ "18-29",
          agegroup == 30 ~ "30-44",
          agegroup == 45 ~ "45-59",
          agegroup == 60 ~ "60+"
        ),
        sex = as.character(sex)
      ),
    by = c("agegroup", "year", "sex")
  ) %>%
  # Expected events
  mutate(expected = pyrs * haz)


model_poisson_c <- glm(
    from0to1 ~ 1,
    family = poisson,
    offset = log(expected),
    data = df_glm_c
)
pearson_chisq_c <- sum(residuals(model_poisson_c, type = "pearson")^2)
df_residual_c <- df.residual(model_poisson_c)
dispersion_index_c <- pearson_chisq_c / df_residual_c
Code
#https://bendixcarstensen.com/Epi/flup.pdf
#https://onlinelibrary.wiley.com/doi/full/10.1002%2Fijc.34973
sir_tot<- popEpi::sir( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot

c_SISTRAT_c1_fot <- lexpand( clean_df, 
                             status = status, 
                             birth = birth_date_rec, 
                             exit = death_date_rec, entry = disch_date_rec6,
                             #fot=0:10,
                             #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                             breaks = list(per = seq(2010, 2021, by = 1), 
                                           #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                           age = c(18, 30, 45, 60, 76), fot = c(0, .0386,.2465, .5, 1, 3, 5, 7, 9, Inf)),
                             aggre = list(agegroup = age, year = per, sex= sex_rec, fot= fot) )
sir_tot_fot<- popEpi::sir( coh.data = c_SISTRAT_c1_fot, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
                           ref.data = mx_1x1_banded, 
                           ref.rate = 'haz', 
                           print="fot",
                           adjust = c('agegroup','year','sex'), 
                           EAR=T)#Excess Absolute Risks
sir_tot_fot

sr_1_sex_fot <- popEpi::sir(c_SISTRAT_c1_fot, coh.obs = 'from0to1',
              coh.pyrs = 'pyrs',
              ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
              ref.rate = haz,
              print = c("sex", "fot"),
              adjust = c("agegroup", "sex", "year"),
              test.type = "homogeneity",
              conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
              conf.level = 0.95, EAR = T)

sr_1_sex <- popEpi::sir(c_SISTRAT_c1, coh.obs = 'from0to1',
              coh.pyrs = 'pyrs',
              ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
              ref.rate = haz,
              print = c("sex"),
              adjust = c("agegroup", "sex", "year"),
              test.type = "homogeneity",
              conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
              conf.level = 0.95, EAR = T)
sr_1_age <- popEpi::sir(c_SISTRAT_c1, coh.obs = 'from0to1',
              coh.pyrs = 'pyrs',
              ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
              ref.rate = haz,
              print = c("agegroup"),
              adjust = c("agegroup", "sex", "year"),
              test.type = "homogeneity",
              conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
              conf.level = 0.95, EAR = T)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_df<- 
cbind.data.frame(
total= "Overall",
observed= round(sir_tot$observed,0),
pyrs= round(sir_tot$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot$observed, sir_tot$pyrs, phi=1))))),
expected= round(sir_tot$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot, phi=dispersion_index)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot$EAR)), 
phi=dispersion_index)

# df_glm <- c_SISTRAT_c1 %>%
#     dplyr::left_join(mx_1x1_banded, by = c("agegroup", "year", "sex")) %>%
#     dplyr::mutate(expected = pyrs * haz)

# Run analysis
sr_1_sex_df <- sir_cmr_subgroup(
    df = df_glm,
    group_var = "sex"
) 
sr_1_sex_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_sex$observed, sr_1_sex$pyrs, phi=1))))


sr_1_age_df <- sir_cmr_subgroup(
    df = df_glm,
    group_var = "agegroup"
)
sr_1_age_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_age$observed, sr_1_age$pyrs, phi=1))))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

cat("Now by strata")

sir_tot_amb<- popEpi::sir( coh.data = c_SISTRAT_c1_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_amb_df<- 
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_amb$observed,0),
pyrs= round(sir_tot_amb$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_amb$observed, sir_tot_amb$pyrs, phi= 1))))),
expected= round(sir_tot_amb$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_amb, phi= extract_phi(c_SISTRAT_c1_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_amb$EAR)), 
phi=extract_phi(c_SISTRAT_c1_amb))


sir_tot_res<- popEpi::sir( coh.data = c_SISTRAT_c1_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_res_df<- 
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_res$observed,0),
pyrs= round(sir_tot_res$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_res$observed, sir_tot_res$pyrs, phi= 1))))),
expected= round(sir_tot_res$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_res, phi= extract_phi(c_SISTRAT_c1_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_res$EAR)), 
phi=extract_phi(c_SISTRAT_c1_res))


sir_tot_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_illicit_df<- 
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_illicit$observed,0),
pyrs= round(sir_tot_illicit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_illicit$observed, sir_tot_illicit$pyrs, phi= 1))))),
expected= round(sir_tot_illicit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_illicit, phi= extract_phi(c_SISTRAT_c1_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_illicit$EAR)), 
phi=extract_phi(c_SISTRAT_c1_illicit))



sir_tot_licit<- popEpi::sir( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_licit_df<- 
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit$observed,0),
pyrs= round(sir_tot_licit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit$observed, sir_tot_licit$pyrs, phi= 1))))),
expected= round(sir_tot_licit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit, phi= extract_phi(c_SISTRAT_c1_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit$EAR)), 
phi=extract_phi(c_SISTRAT_c1_licit))


sir_tot_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_comp_df<- 
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_comp$observed,0),
pyrs= round(sir_tot_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_comp$observed, sir_tot_comp$pyrs, phi= 1))))),
expected= round(sir_tot_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_comp, phi= extract_phi(c_SISTRAT_c1_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_comp$EAR)), 
phi=extract_phi(c_SISTRAT_c1_comp))


sir_tot_not_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_not_comp_df<- 
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_not_comp$observed,0),
pyrs= round(sir_tot_not_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_not_comp$observed, sir_tot_not_comp$pyrs, phi= 1))))),
expected= round(sir_tot_not_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_not_comp, phi= extract_phi(c_SISTRAT_c1_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_not_comp$EAR)), 
phi=extract_phi(c_SISTRAT_c1_not_comp))
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile) 

 Total sir: 3.59 (3.46-3.72)
 Total observed: 2996
 Total expected: 834.72
 Total person-years: 353826 


   observed expected   pyrs   sir sir.lo sir.hi p_value   EAR
      <num>    <num>  <num> <num>  <num>  <num>   <num> <num>
1:     2996   834.72 353826  3.59   3.46   3.72       0 6.108
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile) 
Test for homogeneity: p < 0.001 

 Total sir: 3.59 (3.46-3.72)
 Total observed: 2996
 Total expected: 834.72
 Total person-years: 353826 


Clave <fot>
      fot observed expected      pyrs   sir sir.lo sir.hi p_value    EAR
    <num>    <num>    <num>     <num> <num>  <num>  <num>   <num>  <num>
1: 0.0000       49     5.66   2700.92  8.66   6.46  11.32       0 16.047
2: 0.0386      135    30.31  14408.71  4.45   3.74   5.25       0  7.266
3: 0.2465      135    36.67  17287.71  3.68   3.09   4.34       0  5.688
4: 0.5000      239    71.96  33364.72  3.32   2.92   3.76       0  5.006
5: 1.0000     1028   264.11 116421.81  3.89   3.66   4.14       0  6.561
6: 3.0000      696   205.71  84817.22  3.38   3.14   3.64       0  5.780
7: 5.0000      452   133.75  52809.05  3.38   3.08   3.70       0  6.026
8: 7.0000      209    69.15  26021.20  3.02   2.63   3.45       0  5.374
9: 9.0000       53    17.40   5994.64  3.05   2.30   3.94       0  5.939
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_df, arrange(sr_1_sex_df,desc(sex)), sr_1_age_df, sir_tot_amb_df, sir_tot_res_df, sir_tot_illicit_df, sir_tot_licit_df, sir_tot_comp_df, sir_tot_not_comp_df)|> 
  rename("Characteristic"="total")|>
  mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
                                   is.na(Characteristic)& sex=="Male"~"Male",
                                   is.na(Characteristic)& grepl("18",agegroup)~"18-29",
                                   is.na(Characteristic)& grepl("30",agegroup)~"30-44",
                                   is.na(Characteristic)& grepl("45",agegroup)~"45-59",
                                   is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
                                   ))|> 
    (\(df) {
    df->> df_smr_ind
    df
  })()|> 
  dplyr::select(-sex, -agegroup)|> 
    extract(
      SMR,
      into   = c("est", "low", "high"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                 SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
  dplyr::select(-est, -low, -high)|>  
  knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group")
Dispersion-corrected 95% confidence intervals
All-cause SMRs for patients who accessed SUD treatment by sex and age group
Characteristic observed pyrs CMR_1000 expected EAR phi SMR_dir
Overall 2996 353826 8.5 (8.2–8.8) 835 6.11 3.1764435 3.59 (3.37–3.83)
Male 2396 268325 8.9 (8.6–9.3) 725 6.23 1.6149215 3.30 (3.14–3.48)
Female 600 85501 7.0 (6.5–7.6) 110 5.73 1.3469335 5.47 (4.98–6.00)
18-29 224 77125 2.9 (2.5–3.3) 65 2.06 3.1091828 3.44 (2.73–4.33)
30-44 1070 183191 5.8 (5.5–6.2) 276 4.33 5.6933881 3.88 (3.36–4.47)
45-59 1343 82119 16.4 (15.5–17.3) 374 11.80 2.0250692 3.59 (3.33–3.87)
60+ 359 11391 31.5 (28.4–34.9) 119 21.04 1.3781331 3.01 (2.67–3.40)
Ambulatory 2488 302963 8.2 (7.9–8.5) 728 5.81 1.1859841 3.42 (3.27–3.57)
Residential 508 50863 10.0 (9.2–10.9) 107 7.89 0.8515432 4.76 (4.39–5.15)
Illicit 1254 246046 5.1 (4.8–5.4) 467 3.20 1.7595626 2.68 (2.49–2.89)
Licit 1742 107780 16.2 (15.4–16.9) 367 12.76 1.0611805 4.74 (4.52–4.98)
Completed 671 83248 8.1 (7.5–8.7) 240 5.18 0.8834808 2.80 (2.60–3.00)
Not completed 2325 270578 8.6 (8.3–8.9) 595 6.39 1.1754916 3.91 (3.74–4.09)
Code
cat("Differences by sex in unadjusted CMRs (not phi inflated)\n")
rbind.data.frame(
cbind.data.frame(cat= "Sex (ref= female)", IRR= fisher.test(matrix(c(2396,600,268325,85501),2,2))$estimate[[1]], lower= fisher.test(matrix(c(2396,600,268325,85501),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(2396,600,268325,85501),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Tr. setting (ref=not completed)", IRR= fisher.test(matrix(c(2488,508,302963,50863),2,2))$estimate[[1]], lower= fisher.test(matrix(c(2488,508,302963,50863),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(2488,508,302963,50863),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Tr. compliance (ref= residential)", IRR= fisher.test(matrix(c(671,2325,83248,270578),2,2))$estimate[[1]], lower= fisher.test(matrix(c(671,2325,83248,270578),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(671,2325,83248,270578),2,2))$conf.int[[2]]),
cbind.data.frame(cat= "Primary subs.(ref= licit)", IRR= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$estimate[[1]], lower= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$conf.int[[1]], upper= fisher.test(matrix(c(1254,1742,246046,107780),2,2))$conf.int[[2]])
) |> knitr::kable("markdown", caption="Exact test for IRRs (crude CMRs)", digits=2)
Differences by sex in unadjusted CMRs (not phi inflated)
Exact test for IRRs (crude CMRs)
cat IRR lower upper
Sex (ref= female) 1.27 1.16 1.39
Tr. setting (ref=not completed) 0.82 0.75 0.91
Tr. compliance (ref= residential) 0.94 0.86 1.02
Primary subs.(ref= licit) 0.32 0.29 0.34

We replicated the analysis for the last available treatment.

Code
sir_tot_b<- 
sir( coh.data = c_SISTRAT_c1_b, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
     ref.data = mx_1x1_banded, 
     ref.rate = 'haz', 
     adjust = c('agegroup','year','sex'), 
     EAR=T)#Excess Absolute Risks
sir_tot_b
sr_1_sex_b <- popEpi::sir(c_SISTRAT_c1_b, coh.obs = 'from0to1',
                  coh.pyrs = 'pyrs',
                  ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
                  ref.rate = haz,
                  print = c("sex"),
                  adjust = c("agegroup", "sex", "year"),
                  test.type = "homogeneity",
                  conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
                  conf.level = 0.95, EAR = T)
sr_1_age_b <- popEpi::sir(c_SISTRAT_c1_b, coh.obs = 'from0to1',
                    coh.pyrs = 'pyrs',
                    ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
                    ref.rate = haz,
                    print = c("agegroup"),
                    adjust = c("agegroup", "sex", "year"),
                    test.type = "homogeneity",
                    conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
                    conf.level = 0.95, EAR = T)


#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_b_df<- 
cbind.data.frame(
total= "Overall",
observed= round(sir_tot_b$observed,0),
pyrs= round(sir_tot_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b$observed, sir_tot_b$pyrs, phi=1))))),
expected= round(sir_tot_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b, phi=dispersion_index_b)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b$EAR)),
phi= dispersion_index_b)


# Run analysis
sr_1_sex_b_df <- sir_cmr_subgroup(
    df = df_glm_b,
    group_var = "sex"
)
sr_1_sex_b_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_sex_b$observed, sr_1_sex_b$pyrs, phi=1))))
sr_1_age_b_df <- sir_cmr_subgroup(
    df = df_glm_b,
    group_var = "agegroup"
)
sr_1_age_b_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_age_b$observed, sr_1_age_b$pyrs, phi=1))))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

cat("Now by strata")

sir_tot_amb_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_amb_b_df<- 
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_amb_b$observed,0),
pyrs= round(sir_tot_amb_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_amb_b$observed, sir_tot_amb_b$pyrs, phi= 1))))),
expected= round(sir_tot_amb_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_amb_b, phi= extract_phi(c_SISTRAT_c1_b_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_amb_b$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_amb))


sir_tot_res_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_res_b_df<- 
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_res_b$observed,0),
pyrs= round(sir_tot_res_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_res_b$observed, sir_tot_res_b$pyrs, phi= 1))))),
expected= round(sir_tot_res_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_res_b, phi= extract_phi(c_SISTRAT_c1_b_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_res_b$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_res))

sir_tot_b_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_b_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_illicit_b_df<- 
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_b_illicit$observed,0),
pyrs= round(sir_tot_b_illicit$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_illicit$observed, sir_tot_b_illicit$pyrs, phi= 1))))),
expected= round(sir_tot_b_illicit$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_illicit, phi= extract_phi(c_SISTRAT_c1_b_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_illicit$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_illicit))


sir_tot_licit_b<- popEpi::sir( coh.data = c_SISTRAT_c1_b_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_licit_b_df<- 
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit_b$observed,0),
pyrs= round(sir_tot_licit_b$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit_b$observed, sir_tot_licit_b$pyrs, phi= 1))))),
expected= round(sir_tot_licit_b$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit_b, phi= extract_phi(c_SISTRAT_c1_b_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit_b$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_licit))


sir_tot_b_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_b_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_comp_b_df<- 
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_b_comp$observed,0),
pyrs= round(sir_tot_b_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_comp$observed, sir_tot_b_comp$pyrs, phi= 1))))),
expected= round(sir_tot_b_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_comp, phi= extract_phi(c_SISTRAT_c1_b_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_comp$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_comp))


sir_tot_b_not_comp<- popEpi::sir( coh.data = c_SISTRAT_c1_b_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_not_comp_b_df<- 
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_b_not_comp$observed,0),
pyrs= round(sir_tot_b_not_comp$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_b_not_comp$observed, sir_tot_b_not_comp$pyrs, phi= 1))))),
expected= round(sir_tot_b_not_comp$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_b_not_comp, phi= extract_phi(c_SISTRAT_c1_b_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_b_not_comp$EAR)), 
phi=extract_phi(c_SISTRAT_c1_b_not_comp))
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile) 

 Total sir: 3.96 (3.82-4.1)
 Total observed: 3029
 Total expected: 765.8
 Total person-years: 317628 


   observed expected     pyrs   sir sir.lo sir.hi p_value   EAR
      <num>    <num>    <num> <num>  <num>  <num>   <num> <num>
1:     3029    765.8 317627.6  3.96   3.82    4.1       0 7.125
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_b_df, arrange(sr_1_sex_b_df,desc(sex)), sr_1_age_b_df, sir_tot_amb_b_df, sir_tot_res_b_df, sir_tot_illicit_b_df, sir_tot_licit_b_df, sir_tot_comp_b_df, sir_tot_not_comp_b_df)|> 
  rename("Characteristic"="total")|>
  mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
                                   is.na(Characteristic)& sex=="Male"~"Male",
                                   is.na(Characteristic)& grepl("18",agegroup)~"18-29",
                                   is.na(Characteristic)& grepl("30",agegroup)~"30-44",
                                   is.na(Characteristic)& grepl("45",agegroup)~"45-59",
                                   is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
                                   ))|> 
  dplyr::select(-sex, -agegroup)|> 
  (\(df) {
    df->> df_smr_ind_b
    df
  })()|> 
    extract(
      SMR,
      into   = c("est", "low", "high"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
  dplyr::select(-est, -low, -high)|>    
  knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group, last treatment available")
Dispersion-corrected 95% confidence intervals
All-cause SMRs for patients who accessed SUD treatment by sex and age group, last treatment available
Characteristic observed pyrs CMR_1000 expected EAR phi SMR_dir
Overall 3029 317628 9.5 (9.2–9.9) 766 7.12 3.8986658 3.96 (3.69–4.24)
Male 2425 242583 10.0 (9.6–10.4) 666 7.25 2.5908006 3.64 (3.42–3.88)
Female 604 75044 8.0 (7.4–8.7) 100 6.71 1.7020223 6.03 (5.43–6.69)
18-29 222 66757 3.3 (2.9–3.8) 57 2.47 3.6155528 3.90 (3.04–5.01)
30-44 1097 163996 6.7 (6.3–7.1) 248 5.18 6.4658487 4.42 (3.80–5.14)
45-59 1351 75900 17.8 (16.9–18.8) 346 13.24 2.6600062 3.90 (3.58–4.26)
60+ 359 10975 32.7 (29.5–36.3) 115 22.26 1.4270522 3.13 (2.77–3.54)
Ambulatory 2452 268495 9.1 (8.8–9.5) 661 6.67 1.1849895 3.71 (3.55–3.87)
Residential 577 49132 11.7 (10.8–12.7) 105 9.61 0.7615739 5.49 (5.11–5.90)
Illicit 1242 218996 5.7 (5.4–6.0) 425 3.73 1.7017815 2.93 (2.72–3.15)
Licit 1787 98631 18.1 (17.3–19.0) 341 14.66 1.2348696 5.24 (4.97–5.51)
Completed 716 81226 8.8 (8.2–9.5) 237 5.90 0.6639505 3.02 (2.85–3.21)
Not completed 2313 236402 9.8 (9.4–10.2) 529 7.55 1.2548334 4.37 (4.18–4.58)

We replicated the analysis for patients in unfinished treatments

Code
sir_tot_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_c
sr_1_c_sex <- popEpi::sir(c_SISTRAT_c1_c, coh.obs = 'from0to1',
              coh.pyrs = 'pyrs',
              ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
              ref.rate = haz,
              print = c("sex"),
              adjust = c("agegroup", "sex", "year"),
              test.type = "homogeneity",
              conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
              conf.level = 0.95, EAR = T)
sr_1_c_age <- popEpi::sir(c_SISTRAT_c1_c, coh.obs = 'from0to1',
              coh.pyrs = 'pyrs',
              ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
              ref.rate = haz,
              print = c("agegroup"),
              adjust = c("agegroup", "sex", "year"),
              test.type = "homogeneity",
              conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
              conf.level = 0.95, EAR = T)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
sr_tot_c_df<- 
cbind.data.frame(
total= "Overall",
observed= round(sir_tot_c$observed,0),
pyrs= round(sir_tot_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c$observed, sir_tot_c$pyrs, phi=1))))),
expected= round(sir_tot_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c, phi=dispersion_index_c)[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c$EAR)), 
phi=dispersion_index_c)

# Run analysis
sr_1_sex_c_df <- sir_cmr_subgroup(
    df = df_glm_c,
    group_var = "sex"
) 
sr_1_sex_c_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_c_sex$observed, sr_1_c_sex$pyrs, phi=1))))


sr_1_age_c_df <- sir_cmr_subgroup(
    df = df_glm_c,
    group_var = "agegroup"
)
sr_1_age_c_df$CMR_1000<-do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(cmr_ci_phi(sr_1_c_age$observed, sr_1_c_age$pyrs, phi=1))))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

cat("Now by strata")

sir_tot_c_amb<- popEpi::sir( coh.data = c_SISTRAT_c1_c_amb, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_amb_c_df<- 
cbind.data.frame(
total= "Ambulatory",
observed= round(sir_tot_c_amb$observed,0),
pyrs= round(sir_tot_c_amb$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c_amb$observed, sir_tot_c_amb$pyrs, phi= 1))))),
expected= round(sir_tot_c_amb$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c_amb, phi= extract_phi(c_SISTRAT_c1_c_amb))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c_amb$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_amb))


sir_tot_c_res<- popEpi::sir( coh.data = c_SISTRAT_c1_c_res, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_res_c_df<- 
cbind.data.frame(
total= "Residential",
observed= round(sir_tot_c_res$observed,0),
pyrs= round(sir_tot_c_res$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_c_res$observed, sir_tot_c_res$pyrs, phi= 1))))),
expected= round(sir_tot_c_res$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_c_res, phi= extract_phi(c_SISTRAT_c1_c_res))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_c_res$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_res))


sir_tot_illicit_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_illicit_c_df<- 
cbind.data.frame(
total= "Illicit",
observed= round(sir_tot_illicit_c$observed,0),
pyrs= round(sir_tot_illicit_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_illicit_c$observed, sir_tot_illicit_c$pyrs, phi= 1))))),
expected= round(sir_tot_illicit_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_illicit_c, phi= extract_phi(c_SISTRAT_c1_c_illicit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_illicit_c$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_illicit))


sir_tot_licit_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_licit_c_df<- 
cbind.data.frame(
total= "Licit",
observed= round(sir_tot_licit_c$observed,0),
pyrs= round(sir_tot_licit_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_licit_c$observed, sir_tot_licit_c$pyrs, phi= 1))))),
expected= round(sir_tot_licit_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_licit_c, phi= extract_phi(c_SISTRAT_c1_c_licit))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_licit_c$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_licit))


sir_tot_comp_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_comp_c_df<- 
cbind.data.frame(
total= "Completed",
observed= round(sir_tot_comp_c$observed,0),
pyrs= round(sir_tot_comp_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_comp_c$observed, sir_tot_comp_c$pyrs, phi= 1))))),
expected= round(sir_tot_comp_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_comp_c, phi= extract_phi(c_SISTRAT_c1_c_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_comp_c$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_comp))


sir_tot_not_comp_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_not_comp, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
           ref.data = mx_1x1_banded, 
           ref.rate = 'haz', 
           adjust = c('agegroup','year','sex'), 
           EAR=T)#Excess Absolute Risks
sir_tot_not_comp_c_df<- 
cbind.data.frame(
total= "Not completed",
observed= round(sir_tot_not_comp_c$observed,0),
pyrs= round(sir_tot_not_comp_c$pyrs,0),
CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sir_tot_not_comp_c$observed, sir_tot_not_comp_c$pyrs, phi= 1))))),
expected= round(sir_tot_not_comp_c$expected,0),
SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sir_tot_not_comp_c, phi= extract_phi(c_SISTRAT_c1_c_not_comp))[ , 1:3])))),
EAR= as.character(sprintf("%.2f",sir_tot_not_comp_c$EAR)), 
phi=extract_phi(c_SISTRAT_c1_c_not_comp))
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile) 

 Total sir: 3.93 (3.81-4.06)
 Total observed: 3817
 Total expected: 970.26
 Total person-years: 416286 


   observed expected     pyrs   sir sir.lo sir.hi p_value   EAR
      <num>    <num>    <num> <num>  <num>  <num>   <num> <num>
1:     3817   970.26 416285.6  3.93   3.81   4.06       0 6.838
Now by strata
Code
cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sr_tot_c_df, arrange(sr_1_sex_c_df,desc(sex)), sr_1_age_c_df, sir_tot_amb_c_df, sir_tot_res_c_df, sir_tot_illicit_c_df, sir_tot_licit_c_df, sir_tot_comp_c_df, sir_tot_not_comp_c_df)|> 
  rename("Characteristic"="total")|>
  mutate(Characteristic= case_when(is.na(Characteristic)& sex=="Female"~"Female",
                                   is.na(Characteristic)& sex=="Male"~"Male",
                                   is.na(Characteristic)& grepl("18",agegroup)~"18-29",
                                   is.na(Characteristic)& grepl("30",agegroup)~"30-44",
                                   is.na(Characteristic)& grepl("45",agegroup)~"45-59",
                                   is.na(Characteristic)& grepl("60",agegroup)~"60+",T~Characteristic
                                   ))|> 
    (\(df) {
    df->> df_smr_ind_c
    df
  })()|> 
  dplyr::select(-sex, -agegroup)|> 
    extract(
      SMR,
      into   = c("est", "low", "high"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
  dplyr::select(-est, -low, -high)|>  
  knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. Unfinished treatments")
Dispersion-corrected 95% confidence intervals
All-cause SMRs for patients who accessed SUD treatment by sex and age group. Unfinished treatments
Characteristic observed pyrs CMR_1000 expected EAR phi SMR_dir
Overall 3817 416286 9.2 (8.9–9.5) 970 6.84 3.4353379 3.93 (3.71–4.17)
Male 3012 310088 9.7 (9.4–10.1) 833 7.03 1.4783122 3.62 (3.46–3.78)
Female 805 106197 7.6 (7.1–8.1) 137 6.29 1.4002056 5.86 (5.40–6.36)
18-29 283 91929 3.1 (2.7–3.5) 77 2.24 3.0062893 3.67 (3.00–4.49)
30-44 1360 215034 6.3 (6.0–6.7) 321 4.83 5.9053667 4.23 (3.72–4.81)
45-59 1710 96010 17.8 (17.0–18.7) 434 13.29 3.2505793 3.94 (3.62–4.29)
60+ 464 13312 34.9 (31.8–38.2) 138 24.49 1.0085170 3.36 (3.07–3.68)
Ambulatory 3163 355445 8.9 (8.6–9.2) 842 6.53 1.0588197 3.75 (3.62–3.89)
Residential 654 60841 10.7 (10.0–11.6) 128 8.65 0.8869265 5.12 (4.76–5.50)
Illicit 1577 291633 5.4 (5.1–5.7) 550 3.52 1.6430539 2.87 (2.69–3.06)
Licit 2240 124653 18.0 (17.2–18.7) 421 14.60 0.9672190 5.32 (5.11–5.55)
Completed 1341 145632 9.2 (8.7–9.7) 375 6.63 0.7494584 3.57 (3.41–3.74)
Not completed 2476 270654 9.1 (8.8–9.5) 595 6.95 1.0806766 4.16 (3.99–4.34)
Code
# ==============================================================
#  Aggregated Sir-Spline optimiser           (for c_SISTRAT_c1)
# ==============================================================

sirspline_agg <- function(
        data,                         # cohort data, already aggregated
        obs.var   = "observed",       # column with case counts
        pyrs.var  = "pyrs",           # column with person-years
        ref,                          # data.frame with expected rates
        rate.var  = "haz",            # column in ref containing rate
        by.vars   = c("agegroup","year","sex"),   # merge variables
        spline.vars = c("agegroup","year"),       # timescales to spline
        df.grid   = list(agegroup = 2:5,          # candidate df
                         year      = 2:5),
        interaction = FALSE,          # include ns(age) : ns(year) ?
        adjust      = NULL,           # linear covariates
        strata      = NULL,           # factor strata
        knot.weight = "pyrs",         # "pyrs" (=weighted) or "equal"
        family      = poisson,
        control     = glm.control(maxit = 100),
        verbose     = TRUE) {
    
    stopifnot(requireNamespace("splines", quietly = TRUE))
    
    # -------- 0  harmonise column names ----------------------------------
    names(data)[ match(obs.var,  names(data)) ] <- "OBS"
    names(data)[ match(pyrs.var, names(data)) ] <- "PYRS"
    names(ref )[ match(rate.var, names(ref )) ] <- "RATE"
    
    # -------- 1  merge expected rates & build expected -------------------
    dat <- merge(data, ref, by = by.vars, all.x = TRUE)
    dat$EXPECTED <- with(dat, PYRS * RATE)
    dat <- dat[ complete.cases(dat$EXPECTED) & dat$EXPECTED > 0 , ]
    
    if(!nrow(dat)) stop("All rows removed – no positive expected values.")
    
    # make sure spline variables are numeric
    for(v in spline.vars) dat[[v]] <- as.numeric(as.character(dat[[v]]))
    
    # -------- 2  build grid of df combinations ---------------------------
    df.df <- expand.grid(df.grid, KEEP.OUT.ATTRS = FALSE)
    names(df.df) <- spline.vars
    
    # -------- 3  storage --------------------------------------------------
    fits   <- vector("list", nrow(df.df))
    stats  <- data.frame()
    
    # -------- 4  loop over grid ------------------------------------------
    for(i in seq_len(nrow(df.df))) {
        dfs <- unlist(df.df[i, ], use.names = TRUE)
        
        # ---- 4·1 spline terms ---------------------------------------------
        splTerms <- mapply(function(v, k) {
            x <- dat[[v]]
            
            q <- if(knot.weight == "pyrs") {
                # person-time weighted quantiles
                rep(x, times = round(dat$PYRS))      # integer weights OK
            } else x
            
            knots <- stats::quantile(q,
                                     probs = seq(0,1,length.out = k+1L),
                                     type  = 1, na.rm = TRUE)
            knots <- unique(knots)
            bd    <- range(knots);   inKnot <- setdiff(knots, bd)
            
            if(length(inKnot))
                sprintf("splines::ns(%s, knots=c(%s), Boundary.knots=c(%s))",
                        v, paste(inKnot, collapse=","), paste(bd,collapse=","))
            else
                sprintf("splines::ns(%s, Boundary.knots=c(%s))",
                        v, paste(bd, collapse=","))
        }, v = spline.vars, k = dfs, SIMPLIFY = TRUE, USE.NAMES = FALSE)
        
        rhs <- c(adjust, splTerms)
        
        if(interaction && length(spline.vars) == 2)
            rhs <- c(rhs,
                     sprintf("(%s):(%s)", splTerms[1], splTerms[2]))
        
        if(length(strata))
            rhs <- c(rhs, sprintf("factor(%s)", strata))
        
        frm <- reformulate(rhs, response = "OBS")
        
        # ---- 4·2 fit -------------------------------------------------------
        fit <- tryCatch(
            glm(frm, data = dat,
                offset = log(EXPECTED),
                family = family,
                control = control),
            error   = identity,
            warning = identity)
        
        ok <- inherits(fit,"glm") && fit$converged &&
            all(is.finite(coef(fit)))
        
        if(ok) {
            fits[[i]] <- fit
            stats <- rbind(stats,
                           data.frame(row      = i,
                                      df_combo = paste(dfs, collapse=","),
                                      AIC      = stats::AIC(fit),
                                      BIC      = stats::BIC(fit),
                                      n_par    = length(coef(fit)),
                                      warning  = "",
                                      stringsAsFactors = FALSE))
        } else {
            stats <- rbind(stats,
                           data.frame(row      = i,
                                      df_combo = paste(dfs, collapse=","),
                                      AIC      = NA, BIC = NA,
                                      n_par    = NA,
                                      warning  = conditionMessage(fit),
                                      stringsAsFactors = FALSE))
            if(verbose)
                message("skip ", i, ": ", conditionMessage(fit))
        }
    }
    
    stats <- stats[order(stats$AIC), ]
    out   <- list(table = stats, models = fits)
    class(out) <- "sirAgg"
    out
}

print.sirAgg <- function(x, n = 10, ...) {
    cat("Top models by AIC\n")
    print(utils::head(x$table, n = n), row.names = FALSE)
    invisible(x)
}

sirspline_AIC_optimizer <- function(coh.data, coh.obs, coh.pyrs, ref.data, ref.rate, 
                                    spline, knots_list, adjust = NULL) {
  
  # Convert to data frames
  coh.data <- as.data.frame(coh.data)
  ref.data <- as.data.frame(ref.data)
  
  # Remove spline variables from adjust if present
  if (!is.null(adjust)) {
    adjust <- setdiff(adjust, spline)
    if (length(adjust) == 0) adjust <- NULL
  }
  
  # Identify all grouping variables
  grouping_vars <- unique(c(spline, adjust))
  
  # 1. Aggregate cohort data
  agg_fun <- function(df) {
    aggregate(list(observed = df[[coh.obs]], 
                   pyrs = df[[coh.pyrs]]),
              by = df[grouping_vars],
              FUN = sum, na.rm = TRUE)
  }
  coh_agg <- agg_fun(coh.data)
  
  # 2. Merge with reference data
  data <- merge(coh_agg, ref.data, by = grouping_vars)
  
  # 3. Calculate expected cases
  data$expected <- data$pyrs * data[[ref.rate]]
  
  # 4. Clean data - remove rows with missing values
  complete_cases <- complete.cases(data[, c(grouping_vars, "observed", "expected")])
  data <- data[complete_cases, ]
  
  # 5. Convert spline variables to numeric
  for (var in spline) {
    data[[var]] <- as.numeric(as.character(data[[var]]))
  }
  
  # 6. Prepare results storage
  results <- data.frame()
  
  # 7. Fit models for each knot combination
  for (i in seq_along(knots_list)) {
    k_counts <- knots_list[[i]]
    names(k_counts) <- spline  # Ensure names match spline variables
    
    # Skip invalid combinations
    if (any(k_counts < 2)) {
      message("Skipping combination ", i, " (", paste(k_counts, collapse = ","), 
              ") - knots must be >=2")
      next
    }
    
    # Create spline terms
    spline_terms <- sapply(spline, function(var) {
      # Calculate knots
      knots <- quantile(data[[var]], probs = seq(0, 1, length.out = k_counts[var]), 
                        na.rm = TRUE)
      
      # Boundary and internal knots
      boundary <- knots[c(1, length(knots))]
      internal <- knots[-c(1, length(knots))]
      
      # Create spline term
      if (length(internal) == 0) {
        paste0("splines::ns(", var, ", Boundary.knots = c(", 
               paste(boundary, collapse = ","), "))")
      } else {
        paste0("splines::ns(", var, ", knots = c(", 
               paste(internal, collapse = ","), "), Boundary.knots = c(", 
               paste(boundary, collapse = ","), "))")
      }
    })
    
    # Create formula
    rhs_terms <- c(adjust, spline_terms)
    rhs <- paste(rhs_terms, collapse = " + ")
    formula <- as.formula(paste("observed ~", rhs))
    
    # Fit model
    model <- tryCatch({
      glm(formula, 
          data = data, 
          offset = log(expected),
          family = poisson)
    }, error = function(e) {
      message("Model failed for combination ", i, " (", paste(k_counts, collapse = ","), 
              "): ", e$message)
      return(NULL)
    })
    
    # Skip if model failed
    if (is.null(model)) next
    
    # Store results
    results <- rbind(results, data.frame(
      combination = i,
      knots = paste(k_counts, collapse = ","),
      AIC = AIC(model),
      stringsAsFactors = FALSE
    ))
  }
  
  # Return results sorted by AIC
  if (nrow(results) > 0) {
    results[order(results$AIC), ]
  } else {
    message("No valid models were fitted")
    results
  }
}

# Define knot combinations to test
knots_combinations <- list(
  c(agegroup = 1, year = 2),
  c(agegroup = 1, year = 3),
  c(agegroup = 1, year = 4),
  c(agegroup = 1, year = 5),
  c(agegroup = 2, year = 2),
  c(agegroup = 2, year = 3),
  c(agegroup = 2, year = 4),
  c(agegroup = 2, year = 5),
  c(agegroup = 3, year = 3),
  c(agegroup = 3, year = 4),
  c(agegroup = 4, year = 3),
  c(agegroup = 4, year = 4),
  c(agegroup = 5, year = 4),
  c(agegroup = 5, year = 3),
  c(agegroup = 5, year = 2),
  c(agegroup = 5, year = 1)
)

# Run the function (remove spline variables from adjust)
aic_results <- sirspline_AIC_optimizer(
  coh.data  = c_SISTRAT_c1,
  coh.obs   = "from0to1",
  coh.pyrs  = "pyrs",
  ref.data  = mx_1x1_banded,
  ref.rate  = "haz",
  spline    = c("agegroup", "year"),
  knots_list = knots_combinations,
  adjust    = "sex"  # Only non-spline variables
)

# Print results
print(aic_results)


# 2. Call the optimiser
opt_sirspline <- sirspline_agg(
    data       = c_SISTRAT_c1,
    obs.var    = "from0to1",
    pyrs.var   = "pyrs",
    ref        = mx_1x1_banded,
    rate.var   = "haz",
    by.vars    = c("agegroup","year","sex"),
    spline.vars= c("agegroup","year"),
    df.grid    = list(agegroup = 1:5, year = 1:5),
    interaction= TRUE,      # set FALSE if you only want additive terms
    adjust     = c("sex"),      # e.g. "sex" if you add it linearly
    strata     = NULL)     # free baseline for each sex

print(opt_sirspline)          # shows the best 10 models by AIC

# 3. Grab the best converged model
best <- opt_sirspline$models[[ opt_sirspline$table$row[1] ]]

#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"
#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"
#":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"":":":":":":":":":"

st_1 <- sirspline( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), 
                   knots    = c(3, 3),           # 3 for age and 3 for year
                   reference.points = c(2010),
                   dependent.splines = TRUE)
st_1_lines<- extract_spline_data(st_1)
#plot(st_1, col=4, log=TRUE)
#title('Splines are dependent')


st_1_ind <- sirspline( coh.data = c_SISTRAT_c1, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines<- extract_spline_data(st_1_ind)
#plot(st_1_ind, col=4, log=TRUE)
#title('Splines are independent')


psych::describeBy(st_1_lines, group="spline")
#  Descriptive statistics by group 
# spline: 1
#              vars   n  mean    sd median trimmed   mad   min   max range  skew kurtosis   se
# spline          1 100  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN      NaN 0.00
# spline_value    2 100 39.00 12.31  39.00   39.00 15.72 18.00 60.00 42.00  0.00    -1.24 1.23
# level           3 100  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN      NaN 0.00
# estimate        4 100  3.59  0.37   3.66    3.62  0.45  2.72  4.03  1.31 -0.54    -0.88 0.04
# lower_ci        5 100  3.27  0.44   3.37    3.30  0.52  2.41  3.78  1.37 -0.47    -1.17 0.04
# upper_ci        6 100  3.94  0.34   4.04    3.99  0.26  3.04  4.29  1.25 -1.15     0.28 0.03
# ------------------------------------------------------------------------------------------------------------------------ 
# spline: 2
#              vars   n    mean   sd  median trimmed  mad     min     max range  skew kurtosis   se
# spline          1 100    2.00 0.00    2.00    2.00 0.00    2.00    2.00  0.00   NaN      NaN 0.00
# spline_value    2 100 2015.00 2.93 2015.00 2015.00 3.74 2010.00 2020.00 10.00  0.00    -1.24 0.29
# level           3 100    1.00 0.00    1.00    1.00 0.00    1.00    1.00  0.00   NaN      NaN 0.00
# estimate        4 100    1.17 0.08    1.19    1.18 0.09    1.00    1.27  0.27 -0.52    -0.99 0.01
# lower_ci        5 100    0.98 0.04    1.00    0.99 0.02    0.86    1.02  0.16 -1.36     0.61 0.00
# upper_ci        6 100    1.41 0.18    1.45    1.43 0.19    1.00    1.61  0.61 -0.71    -0.68 0.02
smrspline_data<- 
rbind.data.frame(
cbind.data.frame(type="dependent",st_1_lines)#, cbind.data.frame(type="independent",st_1_ind_lines)
                 )%>%
  group_by(spline) |> 
  mutate(
    spline_value_normalized = (spline_value - min(spline_value)) / (max(spline_value) - min(spline_value))
  ) |> 
  ungroup() |> 
  mutate(spline= factor(spline, levels = c("year", "agegroup"), labels = c("Calendar year", "Age at discharge")))

cat("Plot")
ggplot(smrspline_data, aes(x = spline_value_normalized, y = estimate, color = spline, fill = spline))+ #
  geom_line()+
  geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci, color = spline, fill = spline), alpha = 0.3)+
  scale_x_continuous(
    name = "Calendar year", # Nombre de tu eje X
    breaks = (seq(2010, 2020, by = 2) - 2010) / (2020 - 2010), # Define los quiebres cada 5 unidades
    labels = paste0(seq(2010, 2020, by = 2), ""), # Etiquetas de los quiebres
    sec.axis = sec_axis(~., breaks= (seq(18, 60, by = 5) - 18) / (60 - 18), labels = paste0(seq(18, 60, by = 5), ""), name= "Age at discharge")
  )+
  labs(x = "Spline Value", y = "SMR", color = "Variable", fill = "Variable")+
  theme_sjPlot_manual()+
  theme(legend.position="bottom")+
  geom_hline(yintercept = 1, linetype = "dashed", color = "black")+#smallest value is the reference point (where SIR = 1)
  theme(
    legend.position = "bottom",
    panel.grid = element_blank(),       # Quitar grids
    panel.background = element_blank(), # Fondo blanco sin líneas
    #axis.text.x = element_text(angle = 45, hjust = 1), # Rotar etiquetas eje x 45 grados
    strip.text = element_text(face = "bold", size=12)
    )+
  #scale_y_continuous(trans = "exp", breaks = c(1, 2, 3, 4, 5),
  #                       name = "SMR") +          # removes the need for readers to exponentiate
  scale_color_manual(name = "Variable",
                     values = c("Calendar year"="grey40",
                                "Age at discharge"="#4DB3FF"))+
  scale_fill_manual(name = "Variable", values = c("Calendar year"="gray60", "Age at discharge"="lightblue"))
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Fig2_SplineSMR.png"), dpi = 600, width = 7, height = 4.5)
   combination knots      AIC
6           10   3,4 484.2027
5            9   3,3 485.1836
8           12   4,4 485.3776
9           13   5,4 485.3776
7           11   4,3 486.3777
10          14   5,3 486.3777
3            7   2,4 489.9243
2            6   2,3 490.8532
4            8   2,5 491.6667
11          15   5,2 491.9440
1            5   2,2 496.8448
Top models by AIC
 row df_combo      AIC      BIC n_par warning
   7      2,2 481.9883 506.7616    10        
   8      3,2 481.9883 506.7616    10        
   9      4,2 485.2628 517.4682    13        
  10      5,2 485.2628 517.4682    13        
  12      2,3 486.0548 518.2602    13        
  13      3,3 486.0548 518.2602    13        
  17      2,4 489.9686 529.6060    16        
  18      3,4 489.9686 529.6060    16        
   2      2,1 491.7196 509.0609     7        
   3      3,1 491.7196 509.0609     7        

 Descriptive statistics by group 
spline: 1
             vars   n  mean    sd median trimmed   mad   min   max range  skew
spline          1 100  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN
spline_value    2 100 39.00 12.31  39.00   39.00 15.72 18.00 60.00 42.00  0.00
level           3 100  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN
estimate        4 100  3.59  0.37   3.66    3.62  0.45  2.72  4.03  1.31 -0.54
lower_ci        5 100  3.27  0.44   3.37    3.30  0.52  2.41  3.78  1.37 -0.47
upper_ci        6 100  3.94  0.34   4.04    3.99  0.26  3.04  4.29  1.25 -1.15
             kurtosis   se
spline            NaN 0.00
spline_value    -1.24 1.23
level             NaN 0.00
estimate        -0.88 0.04
lower_ci        -1.17 0.04
upper_ci         0.28 0.03
------------------------------------------------------------ 
spline: 2
             vars   n    mean   sd  median trimmed  mad     min     max range
spline          1 100    2.00 0.00    2.00    2.00 0.00    2.00    2.00  0.00
spline_value    2 100 2015.00 2.93 2015.00 2015.00 3.74 2010.00 2020.00 10.00
level           3 100    1.00 0.00    1.00    1.00 0.00    1.00    1.00  0.00
estimate        4 100    1.17 0.08    1.19    1.18 0.09    1.00    1.27  0.27
lower_ci        5 100    0.98 0.04    1.00    0.99 0.02    0.86    1.02  0.16
upper_ci        6 100    1.41 0.18    1.45    1.43 0.19    1.00    1.61  0.61
              skew kurtosis   se
spline         NaN      NaN 0.00
spline_value  0.00    -1.24 0.29
level          NaN      NaN 0.00
estimate     -0.52    -0.99 0.01
lower_ci     -1.36     0.61 0.00
upper_ci     -0.71    -0.68 0.02
Plot
Age- and calendar-year patterns in standardised mortality ratios

Age- and calendar-year patterns in standardised mortality ratios

Code
# sir_tot_illicit<- popEpi::sir( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
#                                ref.data = mx_1x1_banded, 
#                                ref.rate = 'haz', 
#                                adjust = c('agegroup','year','sex'), 
#                                EAR=T)#Excess Absolute Risks
# 
# sir_tot_licit<- popEpi::sir( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
#            ref.data = mx_1x1_banded, 
#            ref.rate = 'haz', 
#            adjust = c('agegroup','year','sex'), 
#            EAR=T)#Excess Absolute Risks



st_1_illicit <- sirspline( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), dependent.splines = TRUE)
st_1_lines_illicit<- extract_spline_data(st_1_illicit)
st_1_ind_illicit <- sirspline( coh.data = c_SISTRAT_c1_illicit, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines_illicit<- extract_spline_data(st_1_ind_illicit)


st_1_licit <- sirspline( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), dependent.splines = TRUE)
st_1_lines_licit<- extract_spline_data(st_1_licit)
st_1_ind_licit <- sirspline( coh.data = c_SISTRAT_c1_licit, coh.obs = 'from0to1', coh.pyrs = 'pyrs', 
                   ref.data = mx_1x1_banded, ref.rate = 'haz', 
                   adjust = c('agegroup','year','sex'),
                   spline = c('agegroup','year'), dependent.splines = F)
st_1_ind_lines_licit<- extract_spline_data(st_1_ind_licit)


smrspline_data_illicit<- 
rbind.data.frame(
cbind.data.frame(subs="illicit", type="dependent",st_1_lines_illicit),
cbind.data.frame(subs="illicit", type="independent",st_1_ind_lines_illicit),
cbind.data.frame(subs="licit", type="dependent",st_1_lines_licit),
cbind.data.frame(subs="licit", type="independent",st_1_ind_lines_licit)
                 )

cat("Plot")
ggplot(smrspline_data_illicit, aes(x = spline_value, y = estimate, color = factor(type))) +
  geom_line() +
  geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci, fill = factor(type)), alpha = 0.3) +
  facet_wrap(~ spline+subs, scales = "free_x", labeller = as_labeller(
    c(agegroup = "Age at Discharge", year = "Calendar Year", illicit= "Illicit substances", licit= "Licit substances (alcohol)"))) +
  labs(x = "Spline Value", y = "log(SMR)", color = "All splines in the same model", fill = "All splines in the same model") +
  theme_sjPlot_manual()+
  theme(legend.position="bottom")+
  geom_hline(yintercept = 1, linetype = "dashed", color = "black")+#smallest value is the reference point (where SIR = 1)
  theme(
    legend.position = "bottom",
    panel.grid = element_blank(),       # Quitar grids
    panel.background = element_blank(), # Fondo blanco sin líneas
    axis.text.x = element_text(angle = 45, hjust = 1), # Rotar etiquetas eje x 45 grados
    strip.text = element_text(face = "bold", size=12)
    )+
  scale_y_continuous(trans = "exp", breaks = c(2, 6, 8, 10),
                         name = "SMR") +          # removes the need for readers to exponentiate
  scale_color_manual(name = "Spline specification",
                     values = c("dependent"="grey40",
                                "independent"="#4DB3FF"))+
  scale_fill_manual(name = "Spline specification", values = c("dependent"="gray60",
                                                              "independent"="lightblue"))
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Fig_SplineSMR_illicit.png"), dpi = 600, width = 7, height = 4.5)


prop.table(table(clean_df$disch_age_cat, clean_df$prim_sub_licit),1) |>
   data.frame() |> 
  pivot_wider(names_from=Var2, values_from=Freq) |> 
  mutate(across(c("illicit", "licit"), ~sprintf("%.1f",.*100) )) |> 
  knitr::kable("markdown", caption="Percentages of primary substance of use, by discharge age group")
Plot
Percentages of primary substance of use, by discharge age group
Var1 illicit licit
18-29 83.1 16.9
30-44 68.2 31.8
45-59 36.4 63.6
60+ 14.5 85.5

Age- and calendar-year patterns in standardised mortality ratios

Age- and calendar-year patterns in standardised mortality ratios

Age- and calendar-year patterns in standardised mortality ratios

Stantardized mortality rates- Direct

First, we formatted population data by calendar years grouped ages and sex. This allows us to introduce further the weights for each strata. We calculated the weights separately for each strata (WL), and jointly (proy_ine_reg_group_25_june_every).

Code
proy_ine_reg_group_25_june<-
  proy_ine_com|> 
  #2025 to resemble SER 2024, BUT NOW 18+ INSTEAD OF 15
  #2025-06-11: I need to expand so people older that stayed in treatment could fit
  dplyr::filter(Edad>=18, Edad<76)|> 
  #format to match previous
  dplyr::mutate(reg_res= sprintf("%02d", Region))|> 
  dplyr::ungroup()|> 
  dplyr::mutate(edad_anos_rec= dplyr::case_when(Edad>=18 & Edad<30~18,
                                                Edad>=30 & Edad<45~30,
                                                Edad>=45 & Edad<60~45,
                                                Edad>=60 & Edad<76~60,
                                                T~NA_real_))|>  
  dplyr::group_by(anio, `Sexo (1=Hombre 2=Mujer)`,edad_anos_rec)|>
  dplyr::summarise(poblacion= sum(poblacion, na.rm=T))|> 
  dplyr::rename("sex"="Sexo (1=Hombre 2=Mujer)", "agegroup"="edad_anos_rec", "year"="anio")|> 
  dplyr::mutate(sex= ifelse(sex==2, "Female", "Male")) |>
  ungroup()

summarise() has grouped output by ‘anio’, ‘Sexo (1=Hombre 2=Mujer)’. You can override using the .groups argument.

Code
# In the case that you employ more than one adjusting variable, separate weights should be passed to
# match to the levels of the different adjusting variables. When supplied correctly, "grand" weights
# are formed based on the variable-specific weights by multiplying over the variable-specific weights
# (e.g. if men have w = 0.5 and the age group 0-4 has w = 0.1, the "grand" weight for men aged 0-4
#   is 0.5*0.1). The "grand" weights are then used for adjusting after ensuring they sum to one.
# When using multiple adjusting variables, you are allowed to pass either a named list of weights
# or a data.frame of weights. E.g.
# WL <- list(agegroup = age_w, sex = sex_w) 
proy_ine_reg_group_25_june_sex<-
proy_ine_reg_group_25_june|>  
  #filter(year == 2020)|>  
  dplyr::summarise(pop = sum(poblacion), .by = sex) |> 
  mutate(w=pop/sum(pop))
proy_ine_reg_group_25_june_age<-
  proy_ine_reg_group_25_june|>  
  #filter(year == 2020)|>  
  dplyr::summarise(pop = sum(poblacion), .by = agegroup) |> 
  mutate(w=pop/sum(pop))
proy_ine_reg_group_25_june_year<-
  proy_ine_reg_group_25_june|>  
  #filter(year == 2020)|>  
  dplyr::summarise(pop = sum(poblacion), .by = year) |> 
  mutate(w=pop/sum(pop))

WL <- list(
  year=proy_ine_reg_group_25_june_year$w, 
           agegroup = proy_ine_reg_group_25_june_age$w, 
           sex = proy_ine_reg_group_25_june_sex$w) 

proy_ine_reg_group_25_june_every<- 
proy_ine_reg_group_25_june|>  
  group_by(year, agegroup, sex)|> 
  dplyr::summarise(pop = sum(poblacion))|> 
  ungroup()|> 
  mutate(weights=pop/sum(pop))

summarise() has grouped output by ‘year’, ‘agegroup’. You can override using the .groups argument.

Code
weights_df <- data.table::as.data.table(proy_ine_reg_group_25_june_every)[
  , .(year        = as.integer(year),               # num o int
      agegroup    = as.integer(agegroup),
      sex         = factor(sex, levels = c("Male","Female")),  # ¡factor!
      weights     = as.numeric(weights))            # num
]

weights_adj_sex <- data.table::as.data.table(weights_df)[ ,
                                          .(w = sum(weights)),           # collapse over sex
                                          by = .(year, agegroup)
]
#weights_adj_sex[, sum(w)]            # should be 1

We calculate dispersion parameter

Code
extract_phi_dir <- function(df) {
    # Model with weights vector
    model_poisson_weighted <- glm(
        from0to1 ~ factor(agegroup) + factor(sex) + factor(year),
        family = poisson,
        offset = log(pyrs),
        data = df
    )
    
    # Calculate φ
    pearson_chisq <- sum(residuals(model_poisson_weighted, type = "pearson")^2)
    df_residual <- df.residual(model_poisson_weighted)
    dispersion_index <- pearson_chisq / df_residual
    
    return(dispersion_index)
}
# Estimate φ with the same stratification used in the DSR, otherwise residual heterogeneity explodes the statistic.
# Use one φ per subgroup whenever you report subgroup-specific DSRs.
# Always include in the GLM every variable whose variation you do not want counted as dispersion.
# Here, I allow the mean to change with both calendar and age. However, residual degrees of freedom remain because you haven't included the interaction. To estimate overdispersion, I need some residual degrees of freedom: I'll either remove the interaction or just use the intercept within each group.
extract_phi_by_age <- function(df) {
    df %>%
  group_by(agegroup) %>%
  dplyr::reframe({
    # This entire block is now run for each group independently
    
    # Fit the model ONLY on the data from the current group
    m <- glm(from0to1 ~ factor(sex) + factor(year),
             offset = log(pyrs), 
             family = poisson, 
             data = pick(everything())) # pick() correctly gets the group's data
    
    # Return the result as a one-row tibble (or data.frame)
    tibble(phi = sum(residuals(m, type = "pearson")^2) / df.residual(m))
  })
}

extract_phi_by_sex <- function(df) {
    df %>%
        group_by(sex) %>%
  dplyr::reframe({
    # This entire block is now run for each group independently
    
    # Fit the model ONLY on the data from the current group
    m <- glm(from0to1 ~ factor(agegroup) + factor(year),
             offset = log(pyrs), 
             family = poisson, 
             data = pick(everything())) # pick() correctly gets the group's data
    
    # Return the result as a one-row tibble (or data.frame)
    tibble(phi = sum(residuals(m, type = "pearson")^2) / df.residual(m))
  })
}    

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

cat("Test of stratified DSR using Lexis and glm")

## 1. joint weights = w_year * w_age  (no sex weight)
w_year <- proy_ine_reg_group_25_june_year  %>%            # year | w
    transmute(year, w_year = w)

w_age  <- proy_ine_reg_group_25_june_age   %>%            # agegroup | w
    transmute(agegroup, w_age = w)
w_tab<- 
crossing(w_year, w_age)|>
    dplyr::mutate(w = w_year * w_age)|>
    dplyr::select(year, agegroup, w)|> 
    dplyr::mutate(w = w / sum(w))

## 2. merge with Lexis table
lexis_w <- merge(data.table::as.data.table(c_SISTRAT_c1), w_tab,
                 by = c("year", "agegroup"), all = FALSE)

## 3. saturated model (every cell gets its own parameter)
mod_sat <- glm(
  from0to1 ~ sex:factor(year):factor(agegroup) - 1,       # sin intercepto
  offset   = log(pyrs),
  family   = poisson,
  data     = lexis_w
)

## 4. fitted stratum-specific rates = observed rates
## 5. direct standardised rate and CI
## 6. present per 1000 person-years like popEpi::rate()
lexis_w <- lexis_w %>% mutate(r_hat = fitted(mod_sat) / pyrs)

dsr_tab <- lexis_w %>%
  group_by(sex) %>%
  summarise(
    DSR = sum(w * r_hat), #weight the predictor
    var = sum(w^2 * from0to1 / pyrs^2),
    .groups = "drop"
  ) %>%
  mutate(
    SE  = sqrt(var),
    LCI = DSR * exp(-qnorm(.975) * SE / DSR),
    UCI = DSR * exp( qnorm(.975) * SE / DSR)
  ) %>%
  mutate(across(c(DSR, LCI, UCI), ~ .x * 1000))           # por 1 000 py

cat("The result should be the same of rate_sex\n")

#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:
## 1. joint weights = w_year * w_age  (no sex weight)
w_sex  <- proy_ine_reg_group_25_june_sex   %>%            # agegroup | w
    transmute(sex, w_sex = w)
w_tab_agegroup<- 
crossing(w_year, w_sex)|>
    dplyr::mutate(w = w_year * w_sex)|>
    dplyr::select(year, sex, w)|> 
    dplyr::mutate(w = w / sum(w))

## 2. merge with Lexis table
lexis_w_age <- merge(data.table::as.data.table(c_SISTRAT_c1), w_tab_agegroup,
                 by = c("year", "sex"), all = FALSE)

## 3. saturated model (every cell gets its own parameter)
mod_sat_age <- glm(
  from0to1 ~ sex:factor(year):factor(agegroup) - 1,       # sin intercepto
  offset   = log(pyrs),
  family   = poisson,
  data     = lexis_w_age
)

## 4. fitted stratum-specific rates = observed rates
## 5. direct standardised rate and CI
## 6. present per 1000 person-years like popEpi::rate()
lexis_w_age <- lexis_w_age %>% mutate(r_hat = fitted(mod_sat_age) / pyrs)

dsr_tab_age <- lexis_w_age %>%
  group_by(agegroup) %>%
  summarise(
    DSR = sum(w * r_hat), #weight the predictor
    var = sum(w^2 * from0to1 / pyrs^2),
    .groups = "drop"
  ) %>%
  mutate(
    SE  = sqrt(var),
    LCI = DSR * exp(-qnorm(.975) * SE / DSR),
    UCI = DSR * exp( qnorm(.975) * SE / DSR)
  ) %>%
  mutate(across(c(DSR, LCI, UCI), ~ .x * 1000))           # por 1 000 py

cat("The result should be the same of rate_agegroup\n")

Now we calculate the directly-standardized mortality rates.

Code
r2_adj <- rate(
  data    = c_SISTRAT_c1,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df #weights inglm should be applied in the offset
)
DSR_1k <- mapply(
  dsr_format,                 # FUN
  r2_adj$rate.adj,           # primer vector (rate)
  r2_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_adj$rate.adj,           # primer vector (rate)
    r2_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))



r2_adj <- rate(
  data    = c_SISTRAT_c1,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df #weights inglm should be applied in the offset
)

r2_adj_fot <- rate(
  data    = c_SISTRAT_c1_fot,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df, #weights inglm should be applied in the offset
  print= "fot"
)

r2_sex_fot <- rate( data = c_SISTRAT_c1_fot, 
            obs = from0to1, 
            pyrs = pyrs, 
            print = c("sex","fot"),
            adjust = c("year", "agegroup"),
            weights = list(
              year=proy_ine_reg_group_25_june_year$w, 
              agegroup = proy_ine_reg_group_25_june_age$w 
              #sex = proy_ine_reg_group_25_june_sex$w) 
            )
)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sex

r2_sex <- rate( data = c_SISTRAT_c1, 
            obs = from0to1, 
            pyrs = pyrs, 
            print = "sex",
            adjust = c("year", "agegroup"),
            weights = list(
              year=proy_ine_reg_group_25_june_year$w, 
              agegroup = proy_ine_reg_group_25_june_age$w 
              #sex = proy_ine_reg_group_25_june_sex$w) 
            )
)
DSR_1k_sex <- mapply(
  dsr_format,                 # FUN
  r2_sex$rate.adj,           # primer vector (rate)
  r2_sex$SE.rate.adj,        # segundo vector (se)
  1,
  MoreArgs = list(            # argumentos fijos extra
    #phi    = 3.666913,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_1k_corr_sex <- mapply(
    dsr_format_corr,
    r2_sex$rate.adj,
    r2_sex$SE.rate.adj,
    phi     = extract_phi_by_sex(c_SISTRAT_c1)$phi[match(r2_sex$sex, extract_phi_by_sex(c_SISTRAT_c1)$sex)],
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)


#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; age
r2_agegr <- rate( data = c_SISTRAT_c1, 
                obs = from0to1, 
                pyrs = pyrs, 
                print = "agegroup",
                adjust = c("year", "sex"),
                weights = list(
                  year=proy_ine_reg_group_25_june_year$w, 
                  #agegroup = proy_ine_reg_group_25_june_age$w 
                  sex = proy_ine_reg_group_25_june_sex$w) 
                )
DSR_1k_agegr <- mapply(
  dsr_format,                 # FUN
  r2_agegr$rate.adj,           # primer vector (rate)
  r2_agegr$SE.rate.adj,        # segundo vector (se)
  1,
  MoreArgs = list(            # argumentos fijos extra
    #phi    = 3.666913,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_1k_corr_agegr <- mapply(
    dsr_format_corr,
    r2_agegr$rate.adj,
    r2_agegr$SE.rate.adj,
    phi     = extract_phi_by_age(c_SISTRAT_c1)$phi[match(r2_agegr$agegroup, extract_phi_by_age(c_SISTRAT_c1)$agegroup)],  # right φ
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
r2_amb_adj <- rate(
  data    = c_SISTRAT_c1_amb,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)

DSR_amb_1k <- mapply(
  dsr_format,                 # FUN
  r2_amb_adj$rate.adj,           # primer vector (rate)
  r2_amb_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_amb_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_amb_adj$rate.adj,           # primer vector (rate)
    r2_amb_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_amb),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_______#_______#_______#_______#
r2_res_adj <- rate(
  data    = c_SISTRAT_c1_res,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)

DSR_res_1k <- mapply(
  dsr_format,                 # FUN
  r2_res_adj$rate.adj,           # primer vector (rate)
  r2_res_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_res_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_res_adj$rate.adj,           # primer vector (rate)
    r2_res_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_res),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))
#_______#_______#_______#_______#
r2_illicit_adj <- rate(
  data    = c_SISTRAT_c1_illicit,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_illicit_1k <- mapply(
  dsr_format,                 # FUN
  r2_illicit_adj$rate.adj,           # primer vector (rate)
  r2_illicit_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_illicit_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_illicit_adj$rate.adj,           # primer vector (rate)
    r2_illicit_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_illicit),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_______#_______#_______#_______#
r2_licit_adj <- rate(
  data    = c_SISTRAT_c1_licit,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_licit_1k <- mapply(
  dsr_format,                 # FUN
  r2_licit_adj$rate.adj,           # primer vector (rate)
  r2_licit_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_licit_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_licit_adj$rate.adj,           # primer vector (rate)
    r2_licit_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_licit),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_______#_______#_______#_______#
r2_comp_adj <- rate(
  data    = c_SISTRAT_c1_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_comp_1k <- mapply(
  dsr_format,                 # FUN
  r2_comp_adj$rate.adj,           # primer vector (rate)
  r2_comp_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_comp_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_comp_adj$rate.adj,           # primer vector (rate)
    r2_comp_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_comp),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_______#_______#_______#_______#
r2_notcomp_adj <- rate(
  data    = c_SISTRAT_c1_not_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_notcomp_1k <- mapply(
  dsr_format,                 # FUN
  r2_notcomp_adj$rate.adj,           # primer vector (rate)
  r2_notcomp_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_notcomp_1k_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_notcomp_adj$rate.adj,           # primer vector (rate)
    r2_notcomp_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_not_comp),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
  cbind.data.frame(var="Total", t(r2_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_1k_corr),
  cbind.data.frame(var=c("Male","Female"), matrix(r2_sex[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir=matrix(DSR_1k_corr_sex, ncol=1)),
  cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir=matrix(DSR_1k_corr_agegr, ncol=1)), 
  cbind.data.frame(var="Ambulatory", t(r2_amb_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_amb_1k_corr),
  cbind.data.frame(var="Residential", t(r2_res_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_res_1k_corr),
  cbind.data.frame(var="Illicit", t(r2_illicit_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_illicit_1k_corr),
  cbind.data.frame(var="Licit", t(r2_licit_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_licit_1k_corr),
  cbind.data.frame(var="Completed", t(r2_comp_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_comp_1k_corr),
  cbind.data.frame(var="Not completed", t(r2_notcomp_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir=DSR_notcomp_1k_corr)#,
  )|> 
      (\(df) {
    df->> df_smr_dir_a
    df
  })()|> 
  dplyr::mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|> 
  dplyr::mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
  dplyr::select(-any_of(2:7))|>
   extract(
    SMR_dir,
    into   = c("est", "low", "high"),
    regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
    convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                 SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>  
  dplyr::select(-est, -low, -high)|>
  dplyr::rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci") |> 
  knitr::kable("markdown", caption= "SMRs, direct method")
SMRs, direct method
var CMR DSR DSR (SEs robust to dispersion)
Total 8.5 (8.2–8.8) 13.1 (9.0–19.2) 13.1 (8.8–19.5)
Male 8.9 (8.6–9.3) 18.5 (9.5–35.7) 18.5 (8.1–42.0)
Female 7.0 (6.5–7.6) 9.1 (7.5–11.0) 9.1 (7.7–10.6)
18-29 2.9 (2.5–3.3) 2.9 (2.4–3.4) 2.9 (2.5–3.4)
30-44 5.8 (5.5–6.2) 5.7 (4.8–6.7) 5.7 (4.7–6.9)
45-59 16.4 (15.5–17.3) 14.2 (12.9–15.7) 14.2 (13.4–15.1)
60+ 31.5 (28.4–34.9) 46.7 (20.7–105.4) 46.7 (21.2–102.9)
Ambulatory 8.2 (7.9–8.5) 13.2 (9.0–19.3) 13.2 (8.9–19.5)
Residential 10.0 (9.2–10.9) 10.4 (8.9–12.0) 10.3 (9.1–11.8)
Illicit 5.1 (4.8–5.4) 12.1 (5.8–25.3) 12.1 (4.6–31.3)
Licit 16.2 (15.4–16.9) 14.7 (12.8–16.8) 14.7 (12.9–16.7)
Completed 8.1 (7.5–8.7) 8.0 (6.5–9.8) 8.0 (6.6–9.6)
Not completed 8.6 (8.3–8.9) 15.2 (10.4–22.2) 15.2 (10.4–22.2)

CMRs do not have 95% CI corrected for dispersion.

We replicate the analysis for the last treatment.

Code
invisible("B) Last treatment")

r2_b_adj <- rate(
  data    = c_SISTRAT_c1_b,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_1k_b_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_b_adj$rate.adj,           # primer vector (rate)
    r2_b_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_b),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; sex
r2_sex_b <- rate( data = c_SISTRAT_c1_b, 
                obs = from0to1, 
                pyrs = pyrs, 
                print = "sex",
                adjust = c("year", "agegroup"),
                weights = list(
                  year=proy_ine_reg_group_25_june_year$w, 
                  agegroup = proy_ine_reg_group_25_june_age$w 
                  #sex = proy_ine_reg_group_25_june_sex$w) 
                )
)
DSR_1k_corr_sex_b <- mapply(
    dsr_format_corr,
    r2_sex_b$rate.adj,
    r2_sex_b$SE.rate.adj,
    phi     = extract_phi_by_sex(c_SISTRAT_c1_b)$phi[match(r2_sex_b$sex, extract_phi_by_sex(c_SISTRAT_c1_b)$sex)],
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; age
r2_agegr_b <- rate( data = c_SISTRAT_c1_b, 
                  obs = from0to1, 
                  pyrs = pyrs, 
                  print = "agegroup",
                  adjust = c("year", "sex"),
                  weights = list(
                    year=proy_ine_reg_group_25_june_year$w, 
                    #agegroup = proy_ine_reg_group_25_june_age$w 
                    sex = proy_ine_reg_group_25_june_sex$w) 
)
DSR_1k_corr_agegr_b <- mapply(
    dsr_format_corr,
    r2_agegr_b$rate.adj,
    r2_agegr_b$SE.rate.adj,
    phi     = extract_phi_by_age(c_SISTRAT_c1_b)$phi[match(r2_agegr_b$agegroup, extract_phi_by_age(c_SISTRAT_c1_b)$agegroup)],  # right φ
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

r2_amb_b_adj <- rate(
  data    = c_SISTRAT_c1_b_amb,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_amb_1k_b <- mapply(
  dsr_format,                 # FUN
  r2_amb_b_adj$rate.adj,           # primer vector (rate)
  r2_amb_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_amb_1k_b_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_amb_b_adj$rate.adj,           # primer vector (rate)
  r2_amb_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_b_amb),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_res_b_adj <- rate(
  data    = c_SISTRAT_c1_b_res,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_res_1k_b <- mapply(
  dsr_format,                 # FUN
  r2_res_b_adj$rate.adj,           # primer vector (rate)
  r2_res_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_res_1k_b_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_res_b_adj$rate.adj,           # primer vector (rate)
  r2_res_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_b_res),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_illicit_b_adj <- rate(
  data    = c_SISTRAT_c1_b_illicit,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_illicit_1k_b <- mapply(
  dsr_format,                 # FUN
  r2_illicit_b_adj$rate.adj,           # primer vector (rate)
  r2_illicit_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_illicit_1k_b_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_illicit_b_adj$rate.adj,           # primer vector (rate)
  r2_illicit_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_b_illicit),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_licit_b_adj <- rate(
  data    = c_SISTRAT_c1_b_licit,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_licit_1k_b <- mapply(
  dsr_format,                 # FUN
  r2_licit_b_adj$rate.adj,           # primer vector (rate)
  r2_licit_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_licit_1k_b_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_licit_b_adj$rate.adj,           # primer vector (rate)
  r2_licit_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_b_licit),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_comp_b_adj <- rate(
  data    = c_SISTRAT_c1_b_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_comp_1k_b <- mapply(
dsr_format,                 # FUN
r2_comp_b_adj$rate.adj,           # primer vector (rate)
r2_comp_b_adj$SE.rate.adj,        # segundo vector (se)
MoreArgs = list(            # argumentos fijos extra
  phi    = 1,
  factor = 1e3,
  digits = 1,
  conf   = 0.95))
DSR_comp_1k_b_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_comp_b_adj$rate.adj,           # primer vector (rate)
  r2_comp_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_b_comp),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_notcomp_b_adj <- rate(
  data    = c_SISTRAT_c1_b_not_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_notcomp_1k_b <- mapply(
  dsr_format,                 # FUN
  r2_notcomp_b_adj$rate.adj,           # primer vector (rate)
  r2_notcomp_b_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_notcomp_1k_b_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_notcomp_b_adj$rate.adj,           # primer vector (rate)
    r2_notcomp_b_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_b_not_comp),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
  cbind.data.frame(var="Total", t(r2_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_b_corr),
  cbind.data.frame(var=c("Male","Female"), matrix(r2_sex_b[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_sex_b, ncol=1)),
  cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr_b[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_agegr_b, ncol=1)),
  cbind.data.frame(var="Ambulatory", t(r2_amb_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_amb_1k_b_corr),
  cbind.data.frame(var="Residential", t(r2_res_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_res_1k_b_corr),
  cbind.data.frame(var="Illicit", t(r2_illicit_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_illicit_1k_b_corr),
  cbind.data.frame(var="Licit", t(r2_licit_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_licit_1k_b_corr),  
  cbind.data.frame(var="Completed", t(r2_comp_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_comp_1k_b_corr),    
  cbind.data.frame(var="Not completed", t(r2_notcomp_b_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_notcomp_1k_b_corr)
  )|>
    (\(df) {
    df->> df_smr_dir_b
    df
  })()|> 
  mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|> 
  mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
  dplyr::select(-any_of(2:7))|>
  extract(
      SMR_dir,
      into   = c("est", "low", "high"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                 SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>  
  dplyr::select(-est, -low, -high)|>  
  rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|> 
  knitr::kable("markdown", caption= "SMRs, direct method, last treatment followed")
SMRs, direct method, last treatment followed
var CMR DSR DSR (SEs robust to dispersion)
Total 9.5 (9.2–9.9) 15.3 (10.1–23.1) 15.3 (10.0–23.3)
Male 10.0 (9.6–10.4) 21.9 (10.8–44.6) 21.9 (9.1–53.0)
Female 8.0 (7.4–8.7) 10.3 (8.4–12.5) 10.3 (8.8–12.0)
18-29 3.3 (2.9–3.8) 3.6 (3.0–4.5) 3.6 (3.1–4.3)
30-44 6.7 (6.3–7.1) 7.3 (5.8–9.0) 7.3 (5.7–9.2)
45-59 17.8 (16.9–18.8) 16.4 (14.7–18.2) 16.4 (15.3–17.4)
60+ 32.7 (29.5–36.3) 53.3 (21.5–132.2) 53.3 (24.1–117.7)
Ambulatory 9.1 (8.8–9.5) 15.1 (9.9–23.0) 15.1 (9.8–23.1)
Residential 11.7 (10.8–12.7) 12.7 (10.8–14.9) 12.7 (11.1–14.5)
Illicit 5.7 (5.4–6.0) 15.1 (6.4–35.9) 15.1 (5.0–45.3)
Licit 18.1 (17.3–19.0) 17.1 (14.7–19.9) 17.1 (14.7–20.0)
Completed 8.8 (8.2–9.5) 9.1 (7.3–11.4) 9.1 (7.6–11.0)
Not completed 9.8 (9.4–10.2) 17.5 (11.6–26.5) 17.5 (11.6–26.5)

CMRs do not have 95% CI corrected for dispersion.

Now we included unfinished treatments (referral outside SENDA network and ongoing treatments)

Code
invisible("C) Last treatment")

r2_c_adj <- rate(
  data    = c_SISTRAT_c1_c,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_1k_c_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_c_adj$rate.adj,           # primer vector (rate)
    r2_c_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_c),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; sex
r2_sex_c <- rate( data = c_SISTRAT_c1_c, 
                obs = from0to1, 
                pyrs = pyrs, 
                print = "sex",
                adjust = c("year", "agegroup"),
                weights = list(
                  year=proy_ine_reg_group_25_june_year$w, 
                  agegroup = proy_ine_reg_group_25_june_age$w 
                  #sex = proy_ine_reg_group_25_june_sex$w) 
                )
)
DSR_1k_corr_sex_c <- mapply(
    dsr_format_corr,
    r2_sex_c$rate.adj,
    r2_sex_c$SE.rate.adj,
    phi     = extract_phi_by_sex(c_SISTRAT_c1_c)$phi[match(r2_sex_c$sex, extract_phi_by_sex(c_SISTRAT_c1_c)$sex)],
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###### SIR 1x1; 2025-june; Direct; estimate; sensitivity; age
r2_agegr_c <- rate( data = c_SISTRAT_c1_c, 
                  obs = from0to1, 
                  pyrs = pyrs, 
                  print = "agegroup",
                  adjust = c("year", "sex"),
                  weights = list(
                    year=proy_ine_reg_group_25_june_year$w, 
                    #agegroup = proy_ine_reg_group_25_june_age$w 
                    sex = proy_ine_reg_group_25_june_sex$w) 
)
DSR_1k_corr_agegr_c <- mapply(
    dsr_format_corr,
    r2_agegr_c$rate.adj,
    r2_agegr_c$SE.rate.adj,
    phi     = extract_phi_by_age(c_SISTRAT_c1_c)$phi[match(r2_agegr_c$agegroup, extract_phi_by_age(c_SISTRAT_c1_c)$agegroup)],  # right φ
    MoreArgs = list(factor = 1e3, digits = 6, conf = .95)
)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

r2_amb_c_adj <- rate(
  data    = c_SISTRAT_c1_c_amb,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_amb_1k_c <- mapply(
  dsr_format,                 # FUN
  r2_amb_c_adj$rate.adj,           # primer vector (rate)
  r2_amb_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_amb_1k_c_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_amb_c_adj$rate.adj,           # primer vector (rate)
  r2_amb_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_c_amb),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_res_c_adj <- rate(
  data    = c_SISTRAT_c1_c_res,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_res_1k_c <- mapply(
  dsr_format,                 # FUN
  r2_res_c_adj$rate.adj,           # primer vector (rate)
  r2_res_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_res_1k_c_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_res_c_adj$rate.adj,           # primer vector (rate)
  r2_res_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_c_res),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_illicit_c_adj <- rate(
  data    = c_SISTRAT_c1_c_illicit,
  obs     = from0to1,
  pyrs    = pyrs,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_illicit_1k_c <- mapply(
  dsr_format,                 # FUN
  r2_illicit_c_adj$rate.adj,           # primer vector (rate)
  r2_illicit_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_illicit_1k_c_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_illicit_c_adj$rate.adj,           # primer vector (rate)
  r2_illicit_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_c_illicit),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_licit_c_adj <- rate(
  data    = c_SISTRAT_c1_c_licit,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_licit_1k_c <- mapply(
  dsr_format,                 # FUN
  r2_licit_c_adj$rate.adj,           # primer vector (rate)
  r2_licit_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_licit_1k_c_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_licit_c_adj$rate.adj,           # primer vector (rate)
  r2_licit_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_c_licit),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_comp_c_adj <- rate(
  data    = c_SISTRAT_c1_c_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_comp_1k_c <- mapply(
dsr_format,                 # FUN
r2_comp_c_adj$rate.adj,           # primer vector (rate)
r2_comp_c_adj$SE.rate.adj,        # segundo vector (se)
MoreArgs = list(            # argumentos fijos extra
  phi    = 1,
  factor = 1e3,
  digits = 1,
  conf   = 0.95))
DSR_comp_1k_c_corr <- mapply(
  dsr_format_corr,                 # FUN
  r2_comp_c_adj$rate.adj,           # primer vector (rate)
  r2_comp_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
      phi    = extract_phi_dir(c_SISTRAT_c1_c_comp),
      factor = 1e3,
      digits = 6,
      conf   = 0.95))
#_______#_______#_______#_______#
r2_notcomp_c_adj <- rate(
  data    = c_SISTRAT_c1_c_not_comp,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df
)
DSR_notcomp_1k_c <- mapply(
  dsr_format,                 # FUN
  r2_notcomp_c_adj$rate.adj,           # primer vector (rate)
  r2_notcomp_c_adj$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = 1,
    factor = 1e3,
    digits = 1,
    conf   = 0.95))
DSR_notcomp_1k_c_corr <- mapply(
    dsr_format_corr,                 # FUN
    r2_notcomp_c_adj$rate.adj,           # primer vector (rate)
    r2_notcomp_c_adj$SE.rate.adj,        # segundo vector (se)
    MoreArgs = list(            # argumentos fijos extra
        phi    = extract_phi_dir(c_SISTRAT_c1_c_not_comp),
        factor = 1e3,
        digits = 6,
        conf   = 0.95))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
  cbind.data.frame(var="Total", t(r2_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_c_corr),
  cbind.data.frame(var=c("Male","Female"), matrix(r2_sex_c[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_sex_c, ncol=1)),
  cbind.data.frame(var=c("18-29","30-44", "45-59","60+"), matrix(r2_agegr_c[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)], ncol=6), SMR_dir= matrix(DSR_1k_corr_agegr_c, ncol=1)),
  cbind.data.frame(var="Ambulatory", t(r2_amb_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_amb_1k_c_corr),
  cbind.data.frame(var="Residential", t(r2_res_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_res_1k_c_corr),
  cbind.data.frame(var="Illicit", t(r2_illicit_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_illicit_1k_c_corr),
  cbind.data.frame(var="Licit", t(r2_licit_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_licit_1k_c_corr),  
  cbind.data.frame(var="Completed", t(r2_comp_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_comp_1k_c_corr),    
  cbind.data.frame(var="Not completed", t(r2_notcomp_c_adj[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_notcomp_1k_c_corr)
  )|>
    (\(df) {
    df->> df_smr_dir_c
    df
  })()|> 
  mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|> 
  mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
  dplyr::select(-any_of(2:7))|>
  extract(
      SMR_dir,
      into   = c("est", "low", "high"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                 SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>  
  dplyr::select(-est, -low, -high)|>  
  rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|> 
  knitr::kable("markdown", caption= "SMRs, direct method, unfinished treatments")
SMRs, direct method, unfinished treatments
var CMR DSR DSR (SEs robust to dispersion)
Total 9.2 (8.9–9.5) 12.7 (10.7–15.1) 12.7 (10.9–14.9)
Male 9.7 (9.4–10.1) 16.6 (12.3–22.5) 16.6 (12.2–22.7)
Female 7.6 (7.1–8.1) 9.5 (8.1–11.0) 9.5 (8.4–10.6)
18-29 3.1 (2.7–3.5) 2.9 (2.5–3.4) 2.9 (2.5–3.4)
30-44 6.3 (6.0–6.7) 6.2 (5.4–7.1) 6.2 (5.5–7.0)
45-59 17.8 (17.0–18.7) 16.4 (14.9–18.1) 16.4 (15.4–17.5)
60+ 34.9 (31.8–38.2) 37.5 (24.6–57.2) 37.5 (26.4–53.3)
Ambulatory 8.9 (8.6–9.2) 13.6 (10.1–18.3) 13.6 (10.2–18.2)
Residential 10.7 (10.0–11.6) 12.2 (10.5–14.2) 12.2 (10.8–13.9)
Illicit 5.4 (5.1–5.7) 11.6 (6.3–21.5) 11.6 (5.4–25.0)
Licit 18.0 (17.2–18.7) 16.6 (14.9–18.5) 16.6 (15.1–18.3)
Completed 9.2 (8.7–9.7) 10.0 (9.0–11.3) 10.0 (9.1–11.1)
Not completed 9.1 (8.8–9.5) 16.3 (11.4–23.2) 16.3 (11.6–22.9)
Code
# sr_1_sex_fot
# r2_sex_fot
rates_df_fot <- r2_adj_fot %>% 
  mutate(
    # conviene expresar la tasa por 1 000 persona-año
    rate_adj_1k     = rate.adj     * 1e3,
    rate_adj_lo_1k  = rate.adj.lo  * 1e3,
    rate_adj_hi_1k  = rate.adj.hi  * 1e3
  )
rates_sex_df_fot <- r2_sex_fot %>% 
  mutate(
    # conviene expresar la tasa por 1 000 persona-año
    rate_adj_1k     = rate.adj     * 1e3,
    rate_adj_lo_1k  = rate.adj.lo  * 1e3,
    rate_adj_hi_1k  = rate.adj.hi  * 1e3
  )

p_rate2 <- ggplot(rates_sex_df_fot, aes(x = fot, y = rate_adj_1k, fill=sex)) +
  geom_ribbon(aes(ymin = rate_adj_lo_1k, ymax = rate_adj_hi_1k, fill=sex),
              alpha = .20) +
  geom_line(aes(color=sex), size = .9) +
  geom_point(size = 2) +
  scale_x_continuous("Years since discharge",
                     breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
  scale_y_continuous("Adjusted rate \n(deaths ×1,000 PY)",
                     limits = c(0, NA)) +
  theme_minimal(base_family = "Times New Roman")+
  theme(axis.title.x = element_blank())+scale_colour_manual(
  values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
  values = c(Male = alpha("#2C3E8B", 0.25),
             Female = alpha("#E69F00", 0.25))
)

p_sir2 <- ggplot(sr_1_sex_fot, aes(x = fot, y = sir, fill= sex)) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "grey50") +
  geom_ribbon(aes(ymin = sir.lo, ymax = sir.hi, fill=sex),
              alpha = .20) +
  geom_line(aes(color=sex), size = .9) +
  geom_point(size = 2) +
  scale_x_continuous("Years since discharge",
                     breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
  theme_minimal(base_family = "Times New Roman")+
  theme(axis.title.x = element_blank())+ 
  scale_y_log10(
    "Adjusted SMR",
    breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25)),
    labels = scales::number_format(accuracy = 0.1)
  ) +scale_colour_manual(
  values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
  values = c(Male = alpha("#2C3E8B", 0.25),
             Female = alpha("#E69F00", 0.25))
)+ theme(legend.position="none")
  # scale_y_continuous(trans = "exp", breaks = c(0,.5, 1, 1.5,2),
  #                        name = "Adjusted SMR")           # removes the need for readers to exponentiate

legend_shared <- ggpubr::get_legend(
  p_rate2 + theme(legend.position = "bottom")+ guides(fill=guide_legend(title="Sex"), color=guide_legend(title="Sex"))   # basta un panel
)
panels <- plot_grid(
  p_rate2+ theme(legend.position="none"), p_sir2+ theme(legend.position="none"),
  ncol             = 1,
  labels           = c("a", "b"),
  label_size       = 14,
  label_fontfamily = "Times New Roman",
  align            = "v",   # alinea verticalmente
  axis             = "l",   # toma eje izquierdo como referencia
  label_x          = 0,     # esquina izq.
  label_y          = 1,
  hjust            = -0.1,
  vjust            = 1.2
)
  
# Etiqueta global del eje-x
xlab_shared <- ggdraw() +
  draw_label("Years since discharge",
            fontfamily = "Times New Roman",
             fontface = "plain", size = 12, hjust = 0.5)

# Figura final (ajusta rel_heights si necesitas más/menos espacio)
final_plot <- plot_grid(
  panels,
  xlab_shared,
  legend_shared,
  ncol = 1,
  rel_heights = c(1, 0.06, 0.10)   # ajusta espacio a tu gusto
)

# Mostrar o guardar
print(final_plot)
#ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/Figure_1_rates_and_SIR_by_fot.png"), dpi = 600, width = 6*.9, height = 7*.9)

figexp<- 1.5

deinflar_word <- 1/1.07653631284916

ggsave(
    paste0(gsub("/cons","",getwd()), "/cons/_figs/Figure_1_rates_and_SIR_by_fot.pdf"),
    dpi = 600,
    width = 80 *figexp,  # Target width in mm (directly from journal instructions)
    height = 80 * figexp* (7/6), # Adjust height proportionally based on your original plot ratio
    units = "mm",
    device = cairo_pdf  # This is the key to fixing the font error
)

ggsave(
    paste0(gsub("/cons","",getwd()), "/cons/_figs/Figure_1_rates_and_SIR_by_fot.png"),
    dpi = 600,
    width = 80 *figexp*deinflar_word,  # Target width in mm (directly from journal instructions)
    height = 80 * figexp*deinflar_word* (7/6), # Adjust height proportionally based on your original plot ratio
    units = "mm"#,
    #device = cairo_pdf  # This is the key to fixing the font error
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

c_SISTRAT_c1_c_fot <- lexpand( clean_df_c, 
                             status = status, 
                             birth = birth_date_rec, 
                             exit = death_date_rec, entry = disch_date_rec6,
                             #fot=0:10,
                             #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                             breaks = list(per = seq(2010, 2021, by = 1), 
                                           #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                           age = c(18, 30, 45, 60, 76), fot = c(0, .0386,.2465, .5, 1, 3, 5, 7, 9, Inf)),
                             aggre = list(agegroup = age, year = per, sex= sex_rec, fot= fot) )
sir_tot_fot_c<- popEpi::sir( coh.data = c_SISTRAT_c1_c_fot, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
                           ref.data = mx_1x1_banded, 
                           ref.rate = 'haz', 
                           print="fot",
                           adjust = c('agegroup','year','sex'), 
                           EAR=T)#Excess Absolute Risks
sir_tot_fot

sr_1_sex_fot_c <- popEpi::sir(c_SISTRAT_c1_c_fot, coh.obs = 'from0to1',
                            coh.pyrs = 'pyrs',
                            ref.data = mx_1x1_banded[ , c("sex", "year", "agegroup", "haz")],
                            ref.rate = haz,
                            print = c("sex", "fot"),
                            adjust = c("agegroup", "sex", "year"),
                            test.type = "homogeneity",
                            conf.type = "wald", #conf.type = "wald" usa la aproximación normal de Poisson (la misma lógica que explicamos antes).
                            conf.level = 0.95, EAR = T)
r2_adj_fot_c <- rate(
    data    = c_SISTRAT_c1_c_fot,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df, #weights inglm should be applied in the offset
    print= "fot"
)
r2_sex_fot_c <- rate( data = c_SISTRAT_c1_c_fot, 
                    obs = from0to1, 
                    pyrs = pyrs, 
                    print = c("sex","fot"),
                    adjust = c("year", "agegroup"),
                    weights = list(
                        year=proy_ine_reg_group_25_june_year$w, 
                        agegroup = proy_ine_reg_group_25_june_age$w 
                        #sex = proy_ine_reg_group_25_june_sex$w) 
                    )
)


rates_df_fot_c <- r2_adj_fot_c %>% 
  mutate(
    # conviene expresar la tasa por 1 000 persona-año
    rate_adj_1k     = rate.adj     * 1e3,
    rate_adj_lo_1k  = rate.adj.lo  * 1e3,
    rate_adj_hi_1k  = rate.adj.hi  * 1e3
  )
rates_sex_df_fot_c <- r2_sex_fot_c %>% 
  mutate(
    # conviene expresar la tasa por 1 000 persona-año
    rate_adj_1k     = rate.adj     * 1e3,
    rate_adj_lo_1k  = rate.adj.lo  * 1e3,
    rate_adj_hi_1k  = rate.adj.hi  * 1e3
  )

p_rate2_c <- ggplot(rates_sex_df_fot_c, aes(x = fot, y = rate_adj_1k, fill=sex)) +
  geom_ribbon(aes(ymin = rate_adj_lo_1k, ymax = rate_adj_hi_1k, fill=sex),
              alpha = .20) +
  geom_line(aes(color=sex), size = .9) +
  geom_point(size = 2) +
  scale_x_continuous("Years since discharge",
                     breaks = setdiff(round(rates_df_fot_c$fot,2),c(0.04, 0.25))) +
  scale_y_continuous("Adjusted rate \n(deaths ×1,000 PY)",
                     limits = c(0, NA)) +
  theme_minimal()+
  theme(axis.title.x = element_blank())+scale_colour_manual(
  values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
  values = c(Male = alpha("#2C3E8B", 0.25),
             Female = alpha("#E69F00", 0.25))
)

p_sir2_c <- ggplot(sr_1_sex_fot_c, aes(x = fot, y = sir, fill= sex)) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "grey50") +
  geom_ribbon(aes(ymin = sir.lo, ymax = sir.hi, fill=sex),
              alpha = .20) +
  geom_line(aes(color=sex), size = .9) +
  geom_point(size = 2) +
  scale_x_continuous("Years since discharge",
                     breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25))) +
  theme_minimal()+
  theme(axis.title.x = element_blank())+ 
  scale_y_log10(
    "Adjusted SMR",
    breaks = setdiff(round(rates_df_fot$fot,2),c(0.04, 0.25)),
    labels = scales::number_format(accuracy = 0.1)
  ) +scale_colour_manual(
  values = c(Male = "#2C3E8B", Female = "#E69F00")
) +
scale_fill_manual(
  values = c(Male = alpha("#2C3E8B", 0.25),
             Female = alpha("#E69F00", 0.25))
)+ theme(legend.position="none")
  # scale_y_continuous(trans = "exp", breaks = c(0,.5, 1, 1.5,2),
  #                        name = "Adjusted SMR")           # removes the need for readers to exponentiate

legend_shared_c <- get_legend(
  p_rate2_c + theme(legend.position = "bottom")   # basta un panel
)
  panels_c <- plot_grid(
      p_rate2_c+ theme(legend.position="none"), p_sir2_c+ theme(legend.position="none"),
      ncol  = 1,
      align = "v",
      axis  = "l"
  )
# Etiqueta global del eje-x
xlab_shared_c <- ggdraw() +
  draw_label("Years since discharge",
             fontface = "plain", size = 12, hjust = 0.5)

# Figura final (ajusta rel_heights si necesitas más/menos espacio)
final_plot_c <- plot_grid(
  panels_c,
  xlab_shared_c,
  legend_shared_c,
  ncol = 1,
  rel_heights = c(1, 0.06, 0.10)   # ajusta espacio a tu gusto
)

# Mostrar o guardar
print(final_plot_c)
ggsave(paste0(gsub("/cons","",getwd()),"/cons/_figs/rates_and_SIR_by_fot_c.png"), dpi = 600, width = 7, height = 4.5)
SIR (adjusted by agegroup, year, sex) with 95% confidence intervals (profile) 
Test for homogeneity: p < 0.001 

 Total sir: 3.59 (3.46-3.72)
 Total observed: 2996
 Total expected: 834.72
 Total person-years: 353826 


Clave <fot>
      fot observed expected      pyrs   sir sir.lo sir.hi p_value    EAR
    <num>    <num>    <num>     <num> <num>  <num>  <num>   <num>  <num>
1: 0.0000       49     5.66   2700.92  8.66   6.46  11.32       0 16.047
2: 0.0386      135    30.31  14408.71  4.45   3.74   5.25       0  7.266
3: 0.2465      135    36.67  17287.71  3.68   3.09   4.34       0  5.688
4: 0.5000      239    71.96  33364.72  3.32   2.92   3.76       0  5.006
5: 1.0000     1028   264.11 116421.81  3.89   3.66   4.14       0  6.561
6: 3.0000      696   205.71  84817.22  3.38   3.14   3.64       0  5.780
7: 5.0000      452   133.75  52809.05  3.38   3.08   3.70       0  6.026
8: 7.0000      209    69.15  26021.20  3.02   2.63   3.45       0  5.374
9: 9.0000       53    17.40   5994.64  3.05   2.30   3.94       0  5.939
Adjusted all-cause DSRs and SMRs by years since treatment discharge and sex among adults treated for substance-use disorders in Chile, 2010 – 2020

Adjusted all-cause DSRs and SMRs by years since treatment discharge and sex among adults treated for substance-use disorders in Chile, 2010 – 2020

Adjusted all-cause DSRs and SMRs by years since treatment discharge and sex among adults treated for substance-use disorders in Chile, 2010 – 2020

Adjusted all-cause DSRs and SMRs by years since treatment discharge and sex among adults treated for substance-use disorders in Chile, 2010 – 2020

Heterogeneity

Code
sep_ind_a<- 
tibble::tibble(type= "Indirect, main", raw = df_smr_ind$SMR) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)
sep_ind_b<- 
tibble::tibble(type= "Indirect, sens", raw = df_smr_ind_b$SMR) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)
sep_dir_a<- 
tibble(type= "Indirect, main", raw = df_smr_dir_a$SMR_dir) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)
sep_dir_b<- 
tibble::tibble(type= "Indirect, sens", raw = df_smr_dir_b$SMR_dir) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)

sep_ind_c<- 
tibble::tibble(type= "Indirect, main", raw = df_smr_ind_c$SMR) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)
sep_dir_c<- 
tibble::tibble(type= "Indirect, sens", raw = df_smr_dir_c$SMR_dir) %>%
    extract(
      raw,
      into   = c("estimate", "lower", "upper"),
      regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
      convert = TRUE            # convierte a numérico
  )|>
  dplyr::select(type, estimate, lower, upper)

cat("SMRs, indirect, main")

cat("Between sex\n")
variances_ind_a_sex <- ((log(sep_ind_a$upper[2:3]) - log(sep_ind_a$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_a_fe_sex <- rma(yi = log(sep_ind_a$estimate[2:3]), sei = sqrt(variances_ind_a_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_a_fe_sex

cat("Between ages\n")
variances_ind_a_agegroup <- ((log(sep_ind_a$upper[4:7]) - log(sep_ind_a$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_a_fe_agegroup <- rma(yi = log(sep_ind_a$estimate[4:7]), sei = sqrt(variances_ind_a_agegroup), method = "FE")
meta_fe_ind_a_fe_agegroup

cat("Between settings\n")
variances_ind_a_setting <- ((log(sep_ind_a$upper[8:9]) - log(sep_ind_a$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_a_fe_setting <- rma(yi = log(sep_ind_a$estimate[8:9]), sei = sqrt(variances_ind_a_setting), method = "FE")
meta_fe_ind_a_fe_setting

cat("Between primary substance\n")
variances_ind_a_licit <- ((log(sep_ind_a$upper[10:11]) - log(sep_ind_a$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_a_fe_licit <- rma(yi = log(sep_ind_a$estimate[10:11]), sei = sqrt(variances_ind_a_licit), method = "FE")
meta_fe_ind_a_fe_licit

cat("Between completed and non-completed treatments\n")
variances_ind_a_comp <- ((log(sep_ind_a$upper[12:13]) - log(sep_ind_a$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_a_fe_comp <- rma(yi = log(sep_ind_a$estimate[12:13]), sei = sqrt(variances_ind_a_comp), method = "FE")
meta_fe_ind_a_fe_comp

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

cat("SMRs, indirect, main")

cat("Between sex\n")
variances_dir_a_sex <- ((log(sep_dir_a$upper[2:3]) - log(sep_dir_a$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_a_fe_sex <- rma(yi = log(sep_dir_a$estimate[2:3]), sei = sqrt(variances_dir_a_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_a_fe_sex

cat("Between ages\n")
variances_dir_a_agegroup <- ((log(sep_dir_a$upper[4:7]) - log(sep_dir_a$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_a_fe_agegroup <- rma(yi = log(sep_dir_a$estimate[4:7]), sei = sqrt(variances_dir_a_agegroup), method = "FE")
meta_fe_dir_a_fe_agegroup

cat("Between settings\n")
variances_dir_a_setting <- ((log(sep_dir_a$upper[8:9]) - log(sep_dir_a$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_a_fe_setting <- rma(yi = log(sep_dir_a$estimate[8:9]), sei = sqrt(variances_dir_a_setting), method = "FE")
meta_fe_dir_a_fe_setting

cat("Between primary substance\n")
variances_dir_a_licit <- ((log(sep_dir_a$upper[10:11]) - log(sep_dir_a$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_a_fe_licit <- rma(yi = log(sep_dir_a$estimate[10:11]), sei = sqrt(variances_dir_a_licit), method = "FE")
meta_fe_dir_a_fe_licit

cat("Between completed and non-completed treatments\n")
variances_dir_a_comp <- ((log(sep_dir_a$upper[12:13]) - log(sep_dir_a$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_a_fe_comp <- rma(yi = log(sep_dir_a$estimate[12:13]), sei = sqrt(variances_dir_a_comp), method = "FE")
meta_fe_dir_a_fe_comp


bind_rows(
cbind.data.frame(type= "Main", comp= "Sex", Q= meta_fe_ind_a_fe_sex$QE, p= meta_fe_ind_a_fe_sex$QEp, Q_b= meta_fe_dir_a_fe_sex$QE, p_b= meta_fe_dir_a_fe_sex$QEp),
cbind.data.frame(type= "Main", comp= "Age groups", Q= meta_fe_ind_a_fe_agegroup$QE, p= meta_fe_ind_a_fe_agegroup$QEp, Q_b= meta_fe_dir_a_fe_agegroup$QE, p_b= meta_fe_dir_a_fe_agegroup$QEp),
cbind.data.frame(type= "Main", comp= "Setting", Q= meta_fe_ind_a_fe_setting$QE, p= meta_fe_ind_a_fe_setting$QEp, Q_b= meta_fe_dir_a_fe_setting$QE, p_b= meta_fe_dir_a_fe_setting$QEp),
cbind.data.frame(type= "Main", comp= "Primary substance", Q= meta_fe_ind_a_fe_licit$QE, p= meta_fe_ind_a_fe_licit$QEp, Q_b= meta_fe_dir_a_fe_licit$QE, p_b= meta_fe_dir_a_fe_licit$QEp),
cbind.data.frame(type= "Main", comp= "Tr. compliance status", Q= meta_fe_ind_a_fe_comp$QE, p= meta_fe_ind_a_fe_comp$QEp, Q_b= meta_fe_dir_a_fe_comp$QE, p_b= meta_fe_dir_a_fe_comp$QEp)
)|> 
    mutate(
    Qa_SMR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
    ),
    Qa_DSR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
    )
  ) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR) |> 
  knitr::kable("markdown", caption= "Heterogeneity, main")
SMRs, indirect, mainBetween sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.85%
H^2 (total variability / sampling variability):  86.82

Test for Heterogeneity:
Q(df = 1) = 86.8157, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3116  0.0228  57.6099  <.0001  1.2670  1.3563  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   63.85%
H^2 (total variability / sampling variability):  2.77

Test for Heterogeneity:
Q(df = 3) = 8.2993, p-val = 0.0402

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.2487  0.0291  42.9598  <.0001  1.1917  1.3056  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.03%
H^2 (total variability / sampling variability):  50.74

Test for Heterogeneity:
Q(df = 1) = 50.7425, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3022  0.0193  67.5948  <.0001  1.2645  1.3400  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   99.38%
H^2 (total variability / sampling variability):  161.43

Test for Heterogeneity:
Q(df = 1) = 161.4288, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3842  0.0206  67.1615  <.0001  1.3438  1.4246  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.38%
H^2 (total variability / sampling variability):  61.83

Test for Heterogeneity:
Q(df = 1) = 61.8300, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.2704  0.0191  66.4657  <.0001  1.2329  1.3078  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SMRs, indirect, mainBetween sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   63.81%
H^2 (total variability / sampling variability):  2.76

Test for Heterogeneity:
Q(df = 1) = 2.7632, p-val = 0.0965

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.2314  0.0801  27.8706  <.0001  2.0745  2.3884  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   99.31%
H^2 (total variability / sampling variability):  144.23

Test for Heterogeneity:
Q(df = 3) = 432.6794, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.4121  0.0263  91.8591  <.0001  2.3606  2.4635  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   23.71%
H^2 (total variability / sampling variability):  1.31

Test for Heterogeneity:
Q(df = 1) = 1.3108, p-val = 0.2522

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.3603  0.0617  38.2425  <.0001  2.2393  2.4813  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   0.00%
H^2 (total variability / sampling variability):  0.16

Test for Heterogeneity:
Q(df = 1) = 0.1567, p-val = 0.6922

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.6820  0.0650  41.2571  <.0001  2.5546  2.8094  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   88.61%
H^2 (total variability / sampling variability):  8.78

Test for Heterogeneity:
Q(df = 1) = 8.7810, p-val = 0.0030

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.1996  0.0847  25.9775  <.0001  2.0336  2.3655  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Heterogeneity, main
type comp Qa_SMR Qa_DSR
Main Sex Q 86.82 (df=1), p=0.000 Q 2.76 (df=1), p=0.096
Main Age groups Q 8.30 (df=3), p=0.040 Q 432.68 (df=3), p=0.000
Main Setting Q 50.74 (df=1), p=0.000 Q 1.31 (df=1), p=0.252
Main Primary substance Q 161.43 (df=1), p=0.000 Q 0.16 (df=1), p=0.692
Main Tr. compliance status Q 61.83 (df=1), p=0.000 Q 8.78 (df=1), p=0.003
Code
rbind.data.frame(
  tibble(
  group1     = "SMR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
),
  pairwise_smr_test(smrs= sep_ind_a$estimate[4:7], lowers= sep_ind_a$lower[4:7], uppers= sep_ind_a$upper[4:7], a=.1)|> 
  mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
 tibble(
  group1     = "DSR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
), 
pairwise_smr_test(smrs= sep_dir_a$estimate[4:7], lowers= sep_dir_a$lower[4:7], uppers= sep_dir_a$upper[4:7], a=.1)|>
   mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
  dplyr::select(-significance) |> 
  rename("estimate1"="smr1", "estimate2"="smr2")|> 
  mutate(across(c("se_diff", "z"),~round(.,2))) |> 
  knitr::kable("markdown", caption="Pairwise comparison, age groups")
Pairwise comparison, age groups
group1 group2 estimate1 estimate2 difference se_diff z p_unadj p_holm
SMR
18-29 30-44 3.438224 3.876232 -0.438008 0.50 0.88 0.3786415 1.0000000
18-29 45-59 3.438224 3.588361 -0.150137 0.43 0.35 0.7280816 1.0000000
18-29 60+ 3.438224 3.010031 0.428193 0.45 0.95 0.3407015 1.0000000
30-44 45-59 3.876232 3.588361 0.287871 0.32 0.91 0.3625232 1.0000000
30-44 60+ 3.876232 3.010031 0.866201 0.34 2.55 0.0107925 0.0647547
45-59 60+ 3.588361 3.010031 0.578330 0.23 2.48 0.0131587 0.0657933
DSR
18-29 30-44 2.883111 5.683244 -2.800133 0.60 4.63 0.0000037 0.0000146
18-29 45-59 2.883111 14.226041 -11.342930 0.47 24.02 0.0000000 0.0000000
18-29 60+ 2.883111 46.704337 -43.821226 20.84 2.10 0.0354917 0.1064750
30-44 45-59 5.683244 14.226041 -8.542797 0.70 12.23 0.0000000 0.0000000
30-44 60+ 5.683244 46.704337 -41.021093 20.85 1.97 0.0490975 0.1064750
45-59 60+ 14.226041 46.704337 -32.478296 20.84 1.56 0.1191836 0.1191836
Code
cat("SMRs, indirect, sens")

cat("Between sex\n")
variances_ind_b_sex <- ((log(sep_ind_b$upper[2:3]) - log(sep_ind_b$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_b_fe_sex <- rma(yi = log(sep_ind_b$estimate[2:3]), sei = sqrt(variances_ind_b_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_b_fe_sex

cat("Between ages\n")
variances_ind_b_agegroup <- ((log(sep_ind_b$upper[4:7]) - log(sep_ind_b$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_b_fe_agegroup <- rma(yi = log(sep_ind_b$estimate[4:7]), sei = sqrt(variances_ind_b_agegroup), method = "FE")
meta_fe_ind_b_fe_agegroup

cat("Between settings\n")
variances_ind_b_setting <- ((log(sep_ind_b$upper[8:9]) - log(sep_ind_b$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_b_fe_setting <- rma(yi = log(sep_ind_b$estimate[8:9]), sei = sqrt(variances_ind_b_setting), method = "FE")
meta_fe_ind_b_fe_setting

cat("Between primary substance\n")
variances_ind_b_licit <- ((log(sep_ind_b$upper[10:11]) - log(sep_ind_b$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_b_fe_licit <- rma(yi = log(sep_ind_b$estimate[10:11]), sei = sqrt(variances_ind_b_licit), method = "FE")
meta_fe_ind_b_fe_licit

cat("Between completed and non-completed treatments\n")
variances_ind_b_comp <- ((log(sep_ind_b$upper[12:13]) - log(sep_ind_b$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_b_fe_comp <- rma(yi = log(sep_ind_b$estimate[12:13]), sei = sqrt(variances_ind_b_comp), method = "FE")
meta_fe_ind_b_fe_comp

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("SMRs, indirect, sens")

cat("Between sex\n")
variances_dir_b_sex <- ((log(sep_dir_b$upper[2:3]) - log(sep_dir_b$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_b_fe_sex <- rma(yi = log(sep_dir_b$estimate[2:3]), sei = sqrt(variances_dir_b_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_b_fe_sex

cat("Between ages\n")
variances_dir_b_agegroup <- ((log(sep_dir_b$upper[4:7]) - log(sep_dir_b$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_b_fe_agegroup <- rma(yi = log(sep_dir_b$estimate[4:7]), sei = sqrt(variances_dir_b_agegroup), method = "FE")
meta_fe_dir_b_fe_agegroup

cat("Between settings\n")
variances_dir_b_setting <- ((log(sep_dir_b$upper[8:9]) - log(sep_dir_b$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_b_fe_setting <- rma(yi = log(sep_dir_b$estimate[8:9]), sei = sqrt(variances_dir_b_setting), method = "FE")
meta_fe_dir_b_fe_setting

cat("Between primary substance\n")
variances_dir_b_licit <- ((log(sep_dir_b$upper[10:11]) - log(sep_dir_b$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_b_fe_licit <- rma(yi = log(sep_dir_b$estimate[10:11]), sei = sqrt(variances_dir_b_licit), method = "FE")
meta_fe_dir_b_fe_licit

cat("Between completed and non-completed treatments\n")
variances_dir_b_comp <- ((log(sep_dir_b$upper[12:13]) - log(sep_dir_b$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_b_fe_comp <- rma(yi = log(sep_dir_b$estimate[12:13]), sei = sqrt(variances_dir_b_comp), method = "FE")
meta_fe_dir_b_fe_comp

bind_rows(
cbind.data.frame(type= "Sensitivity", comp= "Sex", Q= meta_fe_ind_b_fe_sex$QE, p= meta_fe_ind_b_fe_sex$QEp, Q_b= meta_fe_dir_b_fe_sex$QE, p_b= meta_fe_dir_b_fe_sex$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Age groups", Q= meta_fe_ind_b_fe_agegroup$QE, p= meta_fe_ind_b_fe_agegroup$QEp, Q_b= meta_fe_dir_b_fe_agegroup$QE, p_b= meta_fe_dir_b_fe_agegroup$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Setting", Q= meta_fe_ind_b_fe_setting$QE, p= meta_fe_ind_b_fe_setting$QEp, Q_b= meta_fe_dir_b_fe_setting$QE, p_b= meta_fe_dir_b_fe_setting$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Primary substance", Q= meta_fe_ind_b_fe_licit$QE, p= meta_fe_ind_b_fe_licit$QEp, Q_b= meta_fe_dir_b_fe_licit$QE, p_b= meta_fe_dir_b_fe_licit$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Tr. compliance status", Q= meta_fe_ind_b_fe_comp$QE, p= meta_fe_ind_b_fe_comp$QEp, Q_b= meta_fe_dir_b_fe_comp$QE, p_b= meta_fe_dir_b_fe_comp$QEp)
) |> 
    mutate(
    Qa_SMR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
    ),
    Qa_DSR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
    )
  ) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR)|> 
  knitr::kable("markdown", caption= "Heterogeneity, sensitivity")
SMRs, indirect, sensBetween sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.47%
H^2 (total variability / sampling variability):  65.16

Test for Heterogeneity:
Q(df = 1) = 65.1585, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.4313  0.0278  51.4241  <.0001  1.3767  1.4858  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   78.17%
H^2 (total variability / sampling variability):  4.58

Test for Heterogeneity:
Q(df = 3) = 13.7443, p-val = 0.0033

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3271  0.0318  41.7670  <.0001  1.2648  1.3894  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.82%
H^2 (total variability / sampling variability):  85.06

Test for Heterogeneity:
Q(df = 1) = 85.0625, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.4163  0.0188  75.3034  <.0001  1.3795  1.4532  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   99.39%
H^2 (total variability / sampling variability):  164.66

Test for Heterogeneity:
Q(df = 1) = 164.6616, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.4606  0.0214  68.1468  <.0001  1.4186  1.5026  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.92%
H^2 (total variability / sampling variability):  92.71

Test for Heterogeneity:
Q(df = 1) = 92.7138, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3392  0.0185  72.3847  <.0001  1.3029  1.3754  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SMRs, indirect, sensBetween sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   63.59%
H^2 (total variability / sampling variability):  2.75

Test for Heterogeneity:
Q(df = 1) = 2.7462, p-val = 0.0975

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.3506  0.0768  30.6158  <.0001  2.2001  2.5010  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   98.98%
H^2 (total variability / sampling variability):  97.99

Test for Heterogeneity:
Q(df = 3) = 293.9785, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.5801  0.0298  86.5100  <.0001  2.5216  2.6385  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   0.00%
H^2 (total variability / sampling variability):  0.58

Test for Heterogeneity:
Q(df = 1) = 0.5794, p-val = 0.4465

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.5550  0.0639  39.9919  <.0001  2.4298  2.6803  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   0.00%
H^2 (total variability / sampling variability):  0.05

Test for Heterogeneity:
Q(df = 1) = 0.0479, p-val = 0.8267

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.8375  0.0782  36.2628  <.0001  2.6842  2.9909  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   87.48%
H^2 (total variability / sampling variability):  7.99

Test for Heterogeneity:
Q(df = 1) = 7.9866, p-val = 0.0047

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.3194  0.0854  27.1439  <.0001  2.1519  2.4868  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Heterogeneity, sensitivity
type comp Qa_SMR Qa_DSR
Sensitivity Sex Q 65.16 (df=1), p=0.000 Q 2.75 (df=1), p=0.097
Sensitivity Age groups Q 13.74 (df=3), p=0.003 Q 293.98 (df=3), p=0.000
Sensitivity Setting Q 85.06 (df=1), p=0.000 Q 0.58 (df=1), p=0.447
Sensitivity Primary substance Q 164.66 (df=1), p=0.000 Q 0.05 (df=1), p=0.827
Sensitivity Tr. compliance status Q 92.71 (df=1), p=0.000 Q 7.99 (df=1), p=0.005
Code
rbind.data.frame(
  tibble(
  group1     = "SMR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
),
  pairwise_smr_test(smrs= sep_ind_b$estimate[4:7], lowers= sep_ind_b$lower[4:7], uppers= sep_ind_b$upper[4:7], a=.1)|> 
  mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
 tibble(
  group1     = "DSR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
), 
pairwise_smr_test(smrs= sep_dir_b$estimate[4:7], lowers= sep_dir_b$lower[4:7], uppers= sep_dir_b$upper[4:7], a=.1)|>
   mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
  dplyr::select(-significance) |> 
  rename("estimate1"="smr1", "estimate2"="smr2")|>
  mutate(across(c("se_diff", "z"),~round(.,2))) |> 
  knitr::kable("markdown", caption="Pairwise comparison, age groups, sensitivity")
Pairwise comparison, age groups, sensitivity
group1 group2 estimate1 estimate2 difference se_diff z p_unadj p_holm
SMR
18-29 30-44 3.897919 4.421679 -0.523760 0.61 0.86 0.3884133 0.7768265
18-29 45-59 3.897919 3.904502 -0.006583 0.53 0.01 0.9901223 0.9901223
18-29 60+ 3.897919 3.128950 0.768969 0.54 1.42 0.1545615 0.6182458
30-44 45-59 4.421679 3.904502 0.517177 0.38 1.35 0.1761901 0.6182458
30-44 60+ 4.421679 3.128950 1.292729 0.39 3.28 0.0010339 0.0062031
45-59 60+ 3.904502 3.128950 0.775552 0.26 2.95 0.0031980 0.0159899
DSR
18-29 30-44 3.634236 7.255798 -3.621562 0.96 3.76 0.0001707 0.0006827
18-29 45-59 3.634236 16.356597 -12.722361 0.63 20.30 0.0000000 0.0000000
18-29 60+ 3.634236 53.325194 -49.690958 23.88 2.08 0.0374068 0.1122205
30-44 45-59 7.255798 16.356597 -9.100799 1.06 8.62 0.0000000 0.0000000
30-44 60+ 7.255798 53.325194 -46.069396 23.89 1.93 0.0538073 0.1122205
45-59 60+ 16.356597 53.325194 -36.968597 23.88 1.55 0.1215820 0.1215820
Code
cat("SMRs, indirect, sensitivy (2)")

cat("Between sex\n")
variances_ind_c_sex <- ((log(sep_ind_c$upper[2:3]) - log(sep_ind_c$estimate[2:3])) / qnorm(0.975))^2
meta_fe_ind_c_fe_sex <- rma(yi = log(sep_ind_c$estimate[2:3]), sei = sqrt(variances_ind_c_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_ind_c_fe_sex

cat("Between ages\n")
variances_ind_c_agegroup <- ((log(sep_ind_c$upper[4:7]) - log(sep_ind_c$estimate[4:7])) / qnorm(0.975))^2
meta_fe_ind_c_fe_agegroup <- rma(yi = log(sep_ind_c$estimate[4:7]), sei = sqrt(variances_ind_c_agegroup), method = "FE")
meta_fe_ind_c_fe_agegroup

cat("Between settings\n")
variances_ind_c_setting <- ((log(sep_ind_c$upper[8:9]) - log(sep_ind_c$estimate[8:9])) / qnorm(0.975))^2
meta_fe_ind_c_fe_setting <- rma(yi = log(sep_ind_c$estimate[8:9]), sei = sqrt(variances_ind_c_setting), method = "FE")
meta_fe_ind_c_fe_setting

cat("Between primary substance\n")
variances_ind_c_licit <- ((log(sep_ind_c$upper[10:11]) - log(sep_ind_c$estimate[10:11])) / qnorm(0.975))^2
meta_fe_ind_c_fe_licit <- rma(yi = log(sep_ind_c$estimate[10:11]), sei = sqrt(variances_ind_c_licit), method = "FE")
meta_fe_ind_c_fe_licit

cat("Between completed and non-completed treatments\n")
variances_ind_c_comp <- ((log(sep_ind_c$upper[12:13]) - log(sep_ind_c$estimate[12:13])) / qnorm(0.975))^2
meta_fe_ind_c_fe_comp <- rma(yi = log(sep_ind_c$estimate[12:13]), sei = sqrt(variances_ind_c_comp), method = "FE")
meta_fe_ind_c_fe_comp

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("SMRs, indirect, sens")

cat("Between sex\n")
variances_dir_c_sex <- ((log(sep_dir_c$upper[2:3]) - log(sep_dir_c$estimate[2:3])) / qnorm(0.975))^2
meta_fe_dir_c_fe_sex <- rma(yi = log(sep_dir_c$estimate[2:3]), sei = sqrt(variances_dir_c_sex), method = "FE")
#hetero(3.65, 3.41, 3.88, 6.03, 5.41, 6.66) #the same
meta_fe_dir_c_fe_sex

cat("Between ages\n")
variances_dir_c_agegroup <- ((log(sep_dir_c$upper[4:7]) - log(sep_dir_c$estimate[4:7])) / qnorm(0.975))^2
meta_fe_dir_c_fe_agegroup <- rma(yi = log(sep_dir_c$estimate[4:7]), sei = sqrt(variances_dir_c_agegroup), method = "FE")
meta_fe_dir_c_fe_agegroup

cat("Between settings\n")
variances_dir_c_setting <- ((log(sep_dir_c$upper[8:9]) - log(sep_dir_c$estimate[8:9])) / qnorm(0.975))^2
meta_fe_dir_c_fe_setting <- rma(yi = log(sep_dir_c$estimate[8:9]), sei = sqrt(variances_dir_c_setting), method = "FE")
meta_fe_dir_c_fe_setting

cat("Between primary substance\n")
variances_dir_c_licit <- ((log(sep_dir_c$upper[10:11]) - log(sep_dir_c$estimate[10:11])) / qnorm(0.975))^2
meta_fe_dir_c_fe_licit <- rma(yi = log(sep_dir_c$estimate[10:11]), sei = sqrt(variances_dir_c_licit), method = "FE")
meta_fe_dir_c_fe_licit

cat("Between completed and non-completed treatments\n")
variances_dir_c_comp <- ((log(sep_dir_c$upper[12:13]) - log(sep_dir_c$estimate[12:13])) / qnorm(0.975))^2
meta_fe_dir_c_fe_comp <- rma(yi = log(sep_dir_c$estimate[12:13]), sei = sqrt(variances_dir_c_comp), method = "FE")
meta_fe_dir_c_fe_comp

bind_rows(
cbind.data.frame(type= "Sensitivity", comp= "Sex", Q= meta_fe_ind_c_fe_sex$QE, p= meta_fe_ind_c_fe_sex$QEp, Q_b= meta_fe_dir_c_fe_sex$QE, p_b= meta_fe_dir_c_fe_sex$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Age groups", Q= meta_fe_ind_c_fe_agegroup$QE, p= meta_fe_ind_c_fe_agegroup$QEp, Q_b= meta_fe_dir_c_fe_agegroup$QE, p_b= meta_fe_dir_c_fe_agegroup$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Setting", Q= meta_fe_ind_c_fe_setting$QE, p= meta_fe_ind_c_fe_setting$QEp, Q_b= meta_fe_dir_c_fe_setting$QE, p_b= meta_fe_dir_c_fe_setting$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Primary substance", Q= meta_fe_ind_c_fe_licit$QE, p= meta_fe_ind_c_fe_licit$QEp, Q_b= meta_fe_dir_c_fe_licit$QE, p_b= meta_fe_dir_c_fe_licit$QEp),
cbind.data.frame(type= "Sensitivity", comp= "Tr. compliance status", Q= meta_fe_ind_c_fe_comp$QE, p= meta_fe_ind_c_fe_comp$QEp, Q_b= meta_fe_dir_c_fe_comp$QE, p_b= meta_fe_dir_c_fe_comp$QEp)
) |> 
    mutate(
    Qa_SMR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
    ),
    Qa_DSR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
    )
  ) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR)|> 
  knitr::kable("markdown", caption= "Heterogeneity, sensitivity (2)")
SMRs, indirect, sensitivy (2)Between sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   99.04%
H^2 (total variability / sampling variability):  104.38

Test for Heterogeneity:
Q(df = 1) = 104.3827, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3917  0.0196  71.1303  <.0001  1.3533  1.4300  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   70.61%
H^2 (total variability / sampling variability):  3.40

Test for Heterogeneity:
Q(df = 3) = 10.2080, p-val = 0.0169

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3232  0.0276  47.9016  <.0001  1.2690  1.3773  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   98.24%
H^2 (total variability / sampling variability):  56.78

Test for Heterogeneity:
Q(df = 1) = 56.7822, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3843  0.0164  84.4820  <.0001  1.3522  1.4164  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   99.61%
H^2 (total variability / sampling variability):  259.18

Test for Heterogeneity:
Q(df = 1) = 259.1790, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.4912  0.0175  85.3448  <.0001  1.4569  1.5254  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   95.71%
H^2 (total variability / sampling variability):  23.32

Test for Heterogeneity:
Q(df = 1) = 23.3207, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  1.3591  0.0157  86.8143  <.0001  1.3284  1.3897  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

SMRs, indirect, sensBetween sex

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   90.96%
H^2 (total variability / sampling variability):  11.06

Test for Heterogeneity:
Q(df = 1) = 11.0628, p-val = 0.0009

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.3149  0.0550  42.0836  <.0001  2.2071  2.4227  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between ages

Fixed-Effects Model (k = 4)

I^2 (total heterogeneity / total variability):   99.47%
H^2 (total variability / sampling variability):  188.41

Test for Heterogeneity:
Q(df = 3) = 565.2305, p-val < .0001

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.4198  0.0273  88.7556  <.0001  2.3664  2.4733  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between settings

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   0.00%
H^2 (total variability / sampling variability):  0.46

Test for Heterogeneity:
Q(df = 1) = 0.4556, p-val = 0.4997

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.5200  0.0596  42.3143  <.0001  2.4032  2.6367  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between primary substance

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   0.00%
H^2 (total variability / sampling variability):  0.82

Test for Heterogeneity:
Q(df = 1) = 0.8246, p-val = 0.3638

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.8037  0.0497  56.4004  <.0001  2.7063  2.9012  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Between completed and non-completed treatments

Fixed-Effects Model (k = 2)

I^2 (total heterogeneity / total variability):   86.16%
H^2 (total variability / sampling variability):  7.23

Test for Heterogeneity:
Q(df = 1) = 7.2264, p-val = 0.0072

Model Results:

estimate      se     zval    pval   ci.lb   ci.ub      
  2.3429  0.0472  49.6303  <.0001  2.2504  2.4354  *** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Heterogeneity, sensitivity (2)
type comp Qa_SMR Qa_DSR
Sensitivity Sex Q 104.38 (df=1), p=0.000 Q 11.06 (df=1), p=0.001
Sensitivity Age groups Q 10.21 (df=3), p=0.017 Q 565.23 (df=3), p=0.000
Sensitivity Setting Q 56.78 (df=1), p=0.000 Q 0.46 (df=1), p=0.500
Sensitivity Primary substance Q 259.18 (df=1), p=0.000 Q 0.82 (df=1), p=0.364
Sensitivity Tr. compliance status Q 23.32 (df=1), p=0.000 Q 7.23 (df=1), p=0.007
Code
rbind.data.frame(
  tibble(
  group1     = "SMR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
),
  pairwise_smr_test(smrs= sep_ind_c$estimate[4:7], lowers= sep_ind_c$lower[4:7], uppers= sep_ind_c$upper[4:7], a=.1)|> 
  mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+")),
 tibble(
  group1     = "DSR",
  group2     = NA_character_,
  smr1       = NA_real_,
  smr2       = NA_real_,
  difference = NA_real_,
  se_diff    = NA_real_,
  z          = NA_real_,
  p_unadj    = NA_real_,
  p_holm     = NA_real_,
  significance= NA_character_
), 
pairwise_smr_test(smrs= sep_dir_c$estimate[4:7], lowers= sep_dir_c$lower[4:7], uppers= sep_dir_c$upper[4:7], a=.1)|>
   mutate(group1= case_when(grepl("1$",group1)~"18-29",grepl("2$",group1)~"30-44",grepl("3$",group1)~"45-59",T~"60+"))|>
  mutate(group2= case_when(grepl("1$",group2)~"18-29",grepl("2$",group2)~"30-44",grepl("3$",group2)~"45-59",T~"60+"))
)|>
  dplyr::select(-significance) |> 
  rename("estimate1"="smr1", "estimate2"="smr2")|>
  mutate(across(c("se_diff", "z"),~round(.,2))) |> 
  knitr::kable("markdown", caption="Pairwise comparison, age groups, sensitivity")
Pairwise comparison, age groups, sensitivity
group1 group2 estimate1 estimate2 difference se_diff z p_unadj p_holm
SMR
18-29 30-44 3.671246 4.231312 -0.560066 0.47 1.19 0.2359458 0.9437833
18-29 45-59 3.671246 3.942439 -0.271193 0.42 0.65 0.5165100 1.0000000
18-29 60+ 3.671246 3.361856 0.309390 0.41 0.75 0.4527175 1.0000000
30-44 45-59 4.231312 3.942439 0.288873 0.33 0.88 0.3789381 1.0000000
30-44 60+ 4.231312 3.361856 0.869456 0.32 2.71 0.0066953 0.0401718
45-59 60+ 3.942439 3.361856 0.580583 0.23 2.49 0.0126800 0.0634000
DSR
18-29 30-44 2.942963 6.180656 -3.237693 0.45 7.17 0.0000000 0.0000000
18-29 45-59 2.942963 16.423330 -13.480367 0.60 22.64 0.0000000 0.0000000
18-29 60+ 2.942963 37.547356 -34.604393 6.86 5.04 0.0000005 0.0000014
30-44 45-59 6.180656 16.423330 -10.242674 0.67 15.20 0.0000000 0.0000000
30-44 60+ 6.180656 37.547356 -31.366700 6.87 4.57 0.0000050 0.0000099
45-59 60+ 16.423330 37.547356 -21.124026 6.88 3.07 0.0021399 0.0021399

Kaplan-Meier

Code
clean_df_corr_surv_km<- 
clean_df_corr_surv|> 
    mutate(agegroup = cut(
        disch_age_rec,                   # la variable de edad
        breaks = c(18, 30, 45, 60, 76), # límites (incluye 15, excluye 65)
        right  = FALSE,                 # intervalo izquierdo cerrado  [15–30)
        labels = c("18-29", "30-44", "45-59", "60+"),
        include.lowest = TRUE           # 15 entra en el primer tramo
    ), 
    #year= lubridate::year(as.Date(exit_date))
    year = lubridate::year(as.Date(disch_date_num_rec6))  # USE DISCHARGE YEAR
    )|> 
    filter(disch_age_rec>17, disch_age_rec<76)

##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:

km_fit <- survfit(Surv(pyrs, status==1) ~ 1, data = clean_df_corr_surv_km)

m_data_m <- data.frame(
  time = km_fit$time,
  surv = km_fit$surv,
  upper = km_fit$upper,
  lower = km_fit$lower
  #strata = rep(c("6623, Primer mes, TSM y Comorbilidad(2)","6612, Primer mes TUS(1)"), km_fit$strata)
)

##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:

km_fit_age <- survfit(Surv(pyrs, status==1) ~ strata(agegroup), data = clean_df_corr_surv_km)

m_data_age <- data.frame(
  time = km_fit_age$time,
  surv = km_fit_age$surv,
  upper = km_fit_age$upper,
  lower = km_fit_age$lower,
  strata = rep(c("18-29","30-44", "45-59", "60+"), km_fit_age$strata)
)
age_km<-
ggplot(m_data_age, aes(x = time, y = surv, color = strata)) +
  geom_step(size = 1.2) +  # Curvas de supervivencia
  geom_ribbon(aes(ymin = lower, ymax = upper, fill = strata), alpha = 0.2, color = NA) +  # Intervalos de confianza
  labs(
    x = "Time (years)",
    y = "Survival probability",
    color = "Strata",
    fill = "Strata"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")+
  scale_color_manual(values = gray.colors(4, start = 0, end = 0.8)) +
  scale_fill_manual(values  = gray.colors(4, start = 0, end = 0.8)) +
  ylim(c(0.5,1))
  # scale_color_manual(values = c("#E2725B", "#D2B48C")) +  # Colores para las curvas
  # scale_fill_manual(values = c("#E2725B", "#D2B48C"))    # Colores para las áreas sombreadas

##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:
##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:


km_fit_sex <- survfit(Surv(pyrs, status==1) ~ strata(sex_rec), data = clean_df_corr_surv_km)

m_data_sex <- data.frame(
  time = km_fit_sex$time,
  surv = km_fit_sex$surv,
  upper = km_fit_sex$upper,
  lower = km_fit_sex$lower,
  strata = rep(c("Male", "Female"), km_fit_sex$strata)
)

sex_km<- 
ggplot(m_data_sex, aes(x = time, y = surv, color = strata)) +
  geom_step(size = 1.2) +  # Curvas de supervivencia
  geom_ribbon(aes(ymin = lower, ymax = upper, fill = strata), alpha = 0.2, color = NA) +  # Intervalos de confianza
  labs(
    x = "Time (years)",
    y = "Survival probability",
    color = "Strata",
    fill = "Strata"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")+
  scale_color_manual(values = gray.colors(2, start = 0, end = 0.8)) +
  scale_fill_manual(values  = gray.colors(2, start = 0, end = 0.8)) +
  ylim(c(0.5,1))


plot_grid(age_km, 
          sex_km+ labs(x=NULL)+ theme(axis.text.y = element_blank()), ncol = 2)

ggsave(paste0(getwd(),"/_figs/kaplanmeier_age_sex.png"), dpi = 600, width = 9)
Kaplan meier curves

Kaplan meier curves

Net survival

Code
#This article shows that if the follow-up of the cohort is less than 10 years, 
#any of these methods should give similar results. However, the Hakulinen method is preferred, 
#since it accounts for heterogeneity due to potential withdrawals.
#https://scielo.isciii.es/scielo.php?script=sci_arttext&pid=S0213-91112006000400012



cat("Discharge age and calendar year\n")
clean_df_corr_surv_km$disch_age <-  with(clean_df_corr_surv_km, time_length(interval(birth_date_rec, disch_date_rec6), unit="year"))
clean_df_corr_surv_km$disch_yr<- as.integer(clean_df_corr_surv_km$yr_fr_disch_date)

clean_df_corr_surv_km[, c("follow_up_days","status", "disch_age", "disch_yr", "sex_rec", "agegroup", "res_plan",  "tr_compliance_status", "prim_sub_licit")]|>  rio::export(paste0(gsub("/cons","",getwd()), "/clean_df_corr_surv_km.csv"))

modSR5_SC <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ 1, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
surv_data5_sc <- data.frame(
  time   = modSR5_SC$time,
  surv   = modSR5_SC$surv,
  lower  = modSR5_SC$lower,
  upper  = modSR5_SC$upper
  # Hay que repetir el nombre de estrato la cantidad de filas que le corresponden
  #strata = rep(names(modSR5_SC$strata), modSR5_SC$strata)
) 
sn_5_total<- summary(modSR5_SC, times = c(5)*365.241)
sn_10_total<- summary(modSR5_SC, times = c(10)*365.241)

ggplot() +
  geom_step(data= m_data_m, aes(x = time, y = surv), linewidth = 1, linetype="dashed") +  # Curva de supervivencia
  geom_step(data= surv_data5_sc, aes(x = time/365.25, y = surv), linewidth = 1) +  # Curva de supervivencia
  geom_ribbon(data= surv_data5_sc, aes(x = time/365.25, y = surv, ymin = lower, ymax = upper), fill= "black", alpha = 0.2) +  # Intervalos de confianza
  labs(
    x = "Time since discharge (years)",
    y = "Relative survival",
  ) +
  coord_cartesian(xlim = c(0, 10), ylim= c(0.6, 1.10)) +
  geom_hline(yintercept = 1, linewidth = 2, linetype = "dashed", color = "gray") +
  #theme_minimal()  + 
  theme_sjPlot_manual()+
  theme(
    plot.title = element_text(size = 20),       # Tamaño del título del gráfico
    axis.title.x = element_text(size = 16),    # Tamaño del título del eje x
    axis.title.y = element_text(size = 16),    # Tamaño del título del eje y
    axis.text.x = element_text(size = 14),     # Tamaño de las etiquetas del eje x
    axis.text.y = element_text(size = 14)      # Tamaño de las etiquetas del eje y
  )
Discharge age and calendar year
Global survival

Global survival

Code
modSR5_SC_sex <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ sex_rec, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
sn_5_sex<- summary(modSR5_SC_sex, times = c(5)*365.241)
sn_10_sex<- summary(modSR5_SC_sex, times = c(10)*365.241)


modSR5_SC_agegr <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ agegroup, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
sn_5_age<- summary(modSR5_SC_agegr, times = c(5)*365.241)
sn_10_age<- summary(modSR5_SC_agegr, times = c(10)*365.241)


modSR5_SC_res <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ res_plan, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
sn_5_res<- summary(modSR5_SC_res, times = c(5)*365.241)
sn_10_res<- summary(modSR5_SC_res, times = c(10)*365.241)


modSR5_SC_trcomp <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ tr_compliance_status, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
sn_5_trcomp<- summary(modSR5_SC_trcomp, times = c(5)*365.241)
sn_10_trcomp<- summary(modSR5_SC_trcomp, times = c(10)*365.241)


modSR5_SC_prim_licit <- relsurv::rs.surv(Surv(follow_up_days, status==1)~ prim_sub_licit, data=clean_df_corr_surv_km, 
ratetable = relsurv::transrate.hmd(paste0(getwd(), "/_input/mltper_1x1.txt"), 
                                   paste0(getwd(), "/_input/fltper_1x1.txt")),
method ="pohar-perme", rmap = list(age   = disch_age *365.241  , sex   = sex_rec  ,  year = disch_yr))
sn_5_licit<- summary(modSR5_SC_prim_licit, times = c(5)*365.241)
sn_10_licit<- summary(modSR5_SC_prim_licit, times = c(10)*365.241)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
rbind.data.frame(
tibble(level = "Total", survival = sprintf("%.2f (%.2f–%.2f)",sn_5_total$surv, sn_5_total$lower, sn_5_total$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_sex$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_sex$surv, sn_5_sex$lower, sn_5_sex$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_age$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_age$surv, sn_5_age$lower, sn_5_age$upper)),
tibble(level = c("Ambulatory", "Residential"), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_res$surv, sn_5_res$lower, sn_5_res$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_licit$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_licit$surv, sn_5_licit$lower, sn_5_licit$upper)),
tibble(level = sub(".*=", "", rownames(sn_5_trcomp$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_5_trcomp$surv, sn_5_trcomp$lower, sn_5_trcomp$upper))
)|>
(\(df) { 
  rownames(df) <- NULL
  df->> netsurv_5_main
  df
})() |> 
  knitr::kable("markdown", caption= "Net survival at 5 years of follow-up")

rbind.data.frame(
tibble(level = "Total", survival = sprintf("%.2f (%.2f–%.2f)",sn_10_total$surv, sn_10_total$lower, sn_10_total$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_sex$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_sex$surv, sn_10_sex$lower, sn_10_sex$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_age$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_age$surv, sn_10_age$lower, sn_10_age$upper)),
tibble(level = c("Ambulatory", "Residential"), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_res$surv, sn_10_res$lower, sn_10_res$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_licit$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_licit$surv, sn_10_licit$lower, sn_10_licit$upper)),
tibble(level = sub(".*=", "", rownames(sn_10_trcomp$table)), survival = sprintf("%.2f (%.2f–%.2f)",sn_10_trcomp$surv, sn_10_trcomp$lower, sn_10_trcomp$upper))
)|>
(\(df) { 
  rownames(df) <- NULL
  df->> netsurv_10_main
  df
})() |> 
  knitr::kable("markdown", caption= "Net survival at 10 years of follow-up")
Net survival at 5 years of follow-up
level survival
Total 0.97 (0.97–0.98)
Male 0.97 (0.97–0.98)
Female 0.97 (0.97–0.98)
18-29 0.99 (0.99–0.99)
30-44 0.98 (0.98–0.98)
45-59 0.94 (0.93–0.94)
60+ 0.93 (0.91–0.96)
Ambulatory 0.98 (0.97–0.98)
Residential 0.97 (0.96–0.97)
illicit 0.99 (0.99–0.99)
licit 0.95 (0.94–0.95)
Completed 0.98 (0.98–0.98)
Not completed 0.97 (0.97–0.97)
Net survival at 10 years of follow-up
level survival
Total 0.95 (0.95–0.96)
Male 0.95 (0.95–0.96)
Female 0.95 (0.94–0.96)
18-29 0.98 (0.98–0.98)
30-44 0.95 (0.95–0.96)
45-59 0.89 (0.87–0.91)
60+ 0.83 (0.69–1.00)
Ambulatory 0.96 (0.95–0.96)
Residential 0.93 (0.92–0.94)
illicit 0.97 (0.97–0.98)
licit 0.90 (0.88–0.91)
Completed 0.96 (0.95–0.97)
Not completed 0.95 (0.94–0.95)

Global survival

Approximate z-test for any pair of strata and control the family-wise error rate with Holm correction.

Code
df_netsurv_5_main<- 
  rbind.data.frame(
cbind.data.frame(level = "Total", survival = sn_5_total$surv, lower= sn_5_total$lower, upper= sn_5_total$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_sex$table)), survival = sn_5_sex$surv, lower= sn_5_sex$lower, upper= sn_5_sex$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_age$table)), survival = sn_5_age$surv, lower= sn_5_age$lower, upper= sn_5_age$upper),
cbind.data.frame(level = c("Ambulatory", "Residential"), survival = sn_5_res$surv, lower= sn_5_res$lower, upper=sn_5_res$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_licit$table)), survival = sn_5_licit$surv, lower= sn_5_licit$lower, upper= sn_5_licit$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_5_trcomp$table)), survival = sn_5_trcomp$surv, lower= sn_5_trcomp$lower, upper= sn_5_trcomp$upper)
)
df_netsurv_10_main<- 
  rbind.data.frame(
cbind.data.frame(level = "Total", survival = sn_10_total$surv, lower= sn_10_total$lower, upper= sn_10_total$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_sex$table)), survival = sn_10_sex$surv, lower= sn_10_sex$lower, upper= sn_10_sex$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_age$table)), survival = sn_10_age$surv, lower= sn_10_age$lower, upper= sn_10_age$upper),
cbind.data.frame(level = c("Ambulatory", "Residential"), survival = sn_10_res$surv, lower= sn_10_res$lower, upper=sn_10_res$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_licit$table)), survival = sn_10_licit$surv, lower= sn_10_licit$lower, upper= sn_10_licit$upper),
cbind.data.frame(level = sub(".*=", "", rownames(sn_10_trcomp$table)), survival = sn_10_trcomp$surv, lower= sn_10_trcomp$lower, upper= sn_10_trcomp$upper)
) 

# log(-log) transformation
df_netsurv_5_main$eta        <- log(-log(df_netsurv_5_main$survival))
df_netsurv_5_main$eta_lower  <- log(-log(df_netsurv_5_main$lower))
df_netsurv_5_main$eta_upper  <- log(-log(df_netsurv_5_main$upper))

# standard error on the transformed scale
df_netsurv_5_main$se <- (df_netsurv_5_main$eta_upper - df_netsurv_5_main$eta_lower) / (2*1.96)

# helper to compare two rows
cmp <- function(g1, g2) {
  i <- match(g1, df_netsurv_5_main$level);  j <- match(g2, df_netsurv_5_main$level)
  z  <- (df_netsurv_5_main$eta[i] - df_netsurv_5_main$eta[j]) / sqrt(df_netsurv_5_main$se[i]^2 + df_netsurv_5_main$se[j]^2)
  p  <- 2*pnorm(-abs(z))
  c(z = z, p = p)
}

pairs <- rbind(
  c("licit",        "illicit"),
  c("Completed",    "Not completed"),
  c("Female",       "Male"),
  c("Ambulatory",   "Residential")
)

out <- t(apply(pairs, 1, function(x) cmp(x[1], x[2])))
colnames(out) <- c("z", "raw_p")
out <- cbind.data.frame(pairs, out, holm_p = p.adjust(out[,"raw_p"], method = "holm"))


out |> 
  knitr::kable("markdown", caption= "Comparison by strata at 10 years of follow-up", col.names=c("Var1", "Var2", "z", "p value", "Holm-corrected"))

#La varianza de una probabilidad de supervivencia estimada (usando la fórmula de Greenwood, por ejemplo) depende de la propia probabilidad de supervivencia. Esto viola el supuesto de varianza constante (homocedasticidad) de muchas pruebas estadísticas.
#La transformación log-log complementaria (cloglog) está diseñada específicamente para estabilizar esta varianza. Hace que la varianza de la probabilidad transformada sea mucho menos dependiente de su valor. Esto hace que la prueba z sea más válida y fiable.


# log(-log) transformation
df_netsurv_10_main$eta        <- log(-log(df_netsurv_10_main$survival))
df_netsurv_10_main$eta_lower  <- log(-log(df_netsurv_10_main$lower))
df_netsurv_10_main$eta_upper  <- log(-log(df_netsurv_10_main$upper))

# standard error on the transformed scale
df_netsurv_10_main$se <- (df_netsurv_10_main$eta_upper - df_netsurv_10_main$eta_lower) / (2*1.96)

# helper to compare two rows
cmp2 <- function(g1, g2) {
  i <- match(g1, df_netsurv_10_main$level);  j <- match(g2, df_netsurv_10_main$level)
  z  <- (df_netsurv_10_main$eta[i] - df_netsurv_10_main$eta[j]) / sqrt(df_netsurv_10_main$se[i]^2 + df_netsurv_10_main$se[j]^2)
  p  <- 2*pnorm(-abs(z))
  c(z = z, p = p)
}

pairs <- rbind(
  c("licit",        "illicit"),
  c("Completed",    "Not completed"),
  c("Female",       "Male"),
  c("Ambulatory",   "Residential"),
  c("18-29", "30-44"),
  c("18-29", "45-59"),
  c("18-29", "60+"),
  c("30-44", "45-59"),
  c("30-44", "60+"),
  c("45-59", "60+")
)

out2 <- t(apply(pairs, 1, function(x) cmp2(x[1], x[2])))
colnames(out2) <- c("z", "raw_p")
out2 <- cbind.data.frame(pairs, out2, holm_p = p.adjust(out2[,"raw_p"], method = "holm"))
rownames(out2)<-NULL
if(!is_null(out2$holm_p.1)){
  out2$holm_p.1 <- NULL
}
out2<-if(ncol(out2)>5){out2[,3:8]}else{out2}

out2|>
  mutate(holm_p= case_when(holm_p<.001~"<.001",T~sprintf("%.3f",holm_p)))|> 
  knitr::kable("markdown", caption= "Comparison by strata at 10 years of follow-up", col.names=c("Var1", "Var2", "z", "p value", "Holm-corrected"))
Comparison by strata at 10 years of follow-up
Var1 Var2 z p value Holm-corrected
licit illicit 18.976749 0.0000000 0.0000000
Completed Not completed -3.749014 0.0001775 0.0003551
Female Male 0.660924 0.5086611 0.5086611
Ambulatory Residential -3.893182 0.0000989 0.0002968
Comparison by strata at 10 years of follow-up
Var1 Var2 z p value Holm-corrected
licit illicit 13.4513975 0.0000000 <.001
Completed Not completed -2.3341449 0.0195881 0.098
Female Male 0.8305222 0.4062436 1.000
Ambulatory Residential -4.9034899 0.0000009 <.001
18-29 30-44 -6.9018306 0.0000000 <.001
18-29 45-59 -12.0674056 0.0000000 <.001
18-29 60+ -1.5248118 0.1273060 0.509
30-44 45-59 -7.2357760 0.0000000 <.001
30-44 60+ -0.9079985 0.3638790 1.000
45-59 60+ -0.3325376 0.7394834 1.000

Flowchart

Code
library(DiagrammeR)
gr<-
  grViz("
    digraph flowchart {
      graph [layout = dot, rankdir = TB]
    
      # General node styling
      node [fontname = Times, shape = rectangle, fontsize = 14, style = filled, fillcolor = transparent]
    
      # Main flow nodes
      original [label = 'Original Database\\n(n = 150,046;\\nPatients = 106,283)', fillcolor = lightgray]
      c1_dataset [label = 'Database\\n(n = 146,012;\\nPatients = 103,612)']
      after_discard [label = 'Database\\n(n = 88,774;\\nPatients = 88,774)']
      after_discard2 [label = 'Database\\n(n = 74,470;\\nPatients = 74,470)']
      final_dataset [label = 'Final Database\\n(n = 70,064;\\nPatients = 70,064)', fillcolor = lightgray]
      
      # Discard nodes (aligned between main flow steps)
      discard_referrals [label = '&#8226;Duplicates in admission age and hash key\\l  and validating days in treatment (n= 54);\\l  Records with unavailable missing days in\\l  treatment (eg., currently in treatment): 4,007\\l&#8226;Records with negative days in treatment: 8;\\l>3 yrs. in treatment: 1,039)\\l']
      discard_duplicates [label = '&#8226;Restricting treatments of patients admitted\\l  between 2010-2019; having 18-64 years at\\l  admission: 57,240\\l']
      discard_single [label = '&#8226;Discarded (death, no tr. compliance): 14,304 \\l']
      discard_single2 [label = '&#8226;Discarded missing values in sex, discharge and \\ldeath dates and negative follow-up periods: 4,406\\l']

    
      # Invisible vertices for middle line
      v1 [shape = point, width = 0, style = invis]
      v2 [shape = point, width = 0, style = invis]
      v3 [shape = point, width = 0, style = invis]
      v4 [shape = point, width = 0, style = invis]
    
      # Main flow edges (vertical line)
      original -> v1 [arrowhead = none]
      v1 -> c1_dataset
      c1_dataset -> v2 [arrowhead = none]
      v2 -> after_discard
      after_discard ->v3  [arrowhead = none]
      v3 -> after_discard2
      after_discard2 -> v4 [arrowhead = none]
      v4 -> final_dataset
    
      # Discard connections (from the middle line)
      v1 -> discard_referrals
      v2 -> discard_duplicates
      v3 -> discard_single
      v4 -> discard_single2
    
      # Alignment
      { rank = same; discard_referrals; v1 }
      { rank = same; discard_duplicates; v2 }
      { rank = same; discard_single; v3 }
      { rank = same; discard_single2; v4 }
    }
  ", 
  width = 1000,
  height = 1400)
gr
Code
unlink(paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart_files"), recursive = TRUE)
htmlwidgets::saveWidget(gr, paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.html"))
webshot::webshot(paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.html"), 
                 paste0(gsub("/cons","",getwd()),"/cons/_figs/_mortality_flowchart.png"),
                 vwidth = 300, vheight = 300*1.2,  zoom=10, expand=100)  # Prueba con diferentes coordenadas top, left, width, and height

Registered S3 methods overwritten by ‘callr’: method from format.callr_status_error
print.callr_status_error

Mortality causes

Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#V01–Y98
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

table(!is.na(mortality$diag2))
#1229256  102597 
prop.table(table(!is.na(mortality$diag2)))*100
#92.296672  7.703328 

pat_ext <- "^[VWXY](0[1-9]|[1-8][0-9]|9[0-8])"

mortality <- mortality %>%                         # 2) clasifica
  mutate(
    ext_d1  = !is.na(diag1) & str_detect(diag1, pat_ext),
    ext_d2  = !is.na(diag2) & str_detect(diag2, pat_ext),
    ext_any = ext_d1 | ext_d2,
    miss_d1 = is.na(diag1),
    miss_d2 = is.na(diag2)
  )

# 3) Resumen rápido
mortality %>% summarise(
  n            = n(),
  n_ext_d1     = sum(ext_d1),
  n_ext_d2     = sum(ext_d2),
  n_ext_any    = sum(ext_any),
  n_miss_d1    = sum(miss_d1),
  n_miss_d2    = sum(miss_d2),
  prop_ext_any = mean(ext_any)
)
#         n n_ext_d1 n_ext_d2 n_ext_any n_miss_d1 n_miss_d2 prop_ext_any
#     <int>    <int>    <int>     <int>     <int>     <int>        <dbl>
# 1 1331853        0    96553     96553         0   1229256       0.0725
invisible("Every external cause is in DIAG2")


niveles_icd10 <- c(
  "A00–B99  Infecciosas/parasitarias",
  "C00–D48  Neoplasias",
  "D50–D89  Sangre/inmunidad",
  "E00–E90  Endocrinas/metabólicas",
  "F00–F99  Mentales y del comportamiento",
  "G00–G99  Sistema nervioso",
  "H00–H59  Ojo y anexos",
  "H60–H95  Oído y apófisis mastoides",
  "I00–I99  Circulatorio",
  "J00–J99  Respiratorio",
  "K00–K93  Digestivo",
  "L00–L99  Piel y tejido subcutáneo",
  "M00–M99  Músculo-esquelético y tejido conjuntivo",
  "N00–N99  Genitourinario",
  "O00–O99  Embarazo, parto y puerperio",
  "P00–P96  Perinatal",
  "Q00–Q99  Malformaciones congénitas",
  "R00–R99  Síntomas y signos mal definidos",
  "S00–T88  Lesiones, envenenamientos y otras consecuencias externas",
  "V01–Y98  Causas externas de morbilidad y mortalidad",
  "U00–U85  Códigos de uso especial",
  "Z00–Z99  Factores que influyen en el estado de salud"
)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
mortality <- 
mortality |> 
  dplyr::mutate(mort= ifelse(hashkey %in% clean_df$hash_key,1,0)) |> #slice(-1) |>   
  dplyr::mutate(od = grepl("T400|T401|T403|T404|T406", diag2, ignore.case = F)) |> #Keen, C., Kinner, S. A., Young, J. T., Jang, K., Gan, W., Samji, H., Zhao, B., Krausz, M., & Slaunwhite, A. (2022). Prevalence of co-occurring mental illness and substance use disorder and association with overdose: A linked data cohort study among residents of British Columbia, Canada. Addiction, 117(1), 129-140. https://doi.org/10.1111/add.15580
#Vital statistics   ICD-10 code of T40.0, T40.1, T40.3, T40.4, T40.6
  #Defunciones de jóvenes (15-29 años) por causas externas de mortalidad en Chile,2017.
#https://www.conaset.cl/wp-content/uploads/2020/04/Causas_muerte_j%C3%B3venesDEIS2017.pdf
#https://apps.abacus.ai/chatllm/?appId=df4dbd254&convoId=8f2cf714
  dplyr::mutate(
    category = case_when(
      # Suicides (X60–X84)
      grepl("^X(6[0-9]|7[0-9]|8[0-4])", diag2) ~ "Intentional self-harm",
      # Transport accidents (V01–V99)
      grepl("^V[0-9]{2}", diag2) ~ "Transport accidents",
      # Other unintentional external causes (W00–X59)
        grepl("^(W[0-9]{2}|X[0-5][0-9])", diag2) ~ "Other unintentional external causes of injury",
      # Assaults (X85–Y09)
      grepl("^(X(8[5-9]|9[0-9])|Y0[0-9])", diag2) ~ "Assaults",
      # Sequelae of external causes (Y85–Y89)
      grepl("^Y8[5-9]", diag2) ~ "Other external causes", #Only 6 cases. Should be in other
      # Complications of medical and surgical care (Y40–Y84)
      grepl("^Y([4-7][0-9]|8[0-4])", diag2) ~ "Other external causes", #only 1 case. should be in other
      nchar(diag2)<2| is.na(diag2) ~ "No external causes",
      # All other codes
      TRUE ~ "Other external causes"
    )
  )|>
mutate(chapter = case_when(
           str_detect(diag1, "^R[0-9]{2}")                       ~ "Symptoms & signs",
           str_detect(diag1, "^A|^B")                            ~ "Infectious & parasitic",
           str_detect(diag1, "^I")                               ~ "Circulatory",
           str_detect(diag1, "^J")                               ~ "Respiratory",
           str_detect(diag1, "^K")                               ~ "Digestive",
           str_detect(diag1, "^E")                               ~ "Endocrine & metabolic",
           str_detect(diag1, "^C")                               ~ "Malignant neoplasms",
           str_detect(diag1, "^F")                               ~ "Mental and behavioural",
           str_detect(diag1, "^G")                               ~ "Nervous system",
           str_detect(diag1, "^(D|H|L|M|N|O|P|Q)")           ~ "Other causes",
           TRUE                                                     ~ NA_character_        # if code malformed
         ), chapter= factor(chapter), category= factor(category)) |> 
  mutate(
    chapter2 = case_when(
      str_detect(diag1, "^[AB]")                       ~ niveles_icd10[1],
      str_detect(diag1, "^C|^D0|^D1|^D2|^D3|^D4")      ~ niveles_icd10[2],
      str_detect(diag1, "^D[5-9]")                     ~ niveles_icd10[3],
      str_detect(diag1, "^E")                          ~ niveles_icd10[4],
      str_detect(diag1, "^F")                          ~ niveles_icd10[5],
      str_detect(diag1, "^G")                          ~ niveles_icd10[6],
      str_detect(diag1, "^H0|^H1|^H2|^H3|^H4|^H5")     ~ niveles_icd10[7],
      str_detect(diag1, "^H6|^H7|^H8|^H9")             ~ niveles_icd10[8],
      str_detect(diag1, "^I")                          ~ niveles_icd10[9],
      str_detect(diag1, "^J")                          ~ niveles_icd10[10],
      str_detect(diag1, "^K")                          ~ niveles_icd10[11],
      str_detect(diag1, "^L")                          ~ niveles_icd10[12],
      str_detect(diag1, "^M")                          ~ niveles_icd10[13],
      str_detect(diag1, "^N")                          ~ niveles_icd10[14],
      str_detect(diag1, "^O")                          ~ niveles_icd10[15],
      str_detect(diag1, "^P")                          ~ niveles_icd10[16],
      str_detect(diag1, "^Q")                          ~ niveles_icd10[17],
      str_detect(diag1, "^R")                          ~ niveles_icd10[18],
      str_detect(diag1, "^[ST]")                       ~ niveles_icd10[19],
      str_detect(diag1, "^[VWX]")                      ~ niveles_icd10[20],
      str_detect(diag1, "^U")                          ~ niveles_icd10[21],
      str_detect(diag1, "^Z")                          ~ niveles_icd10[22],
      TRUE                                             ~ NA_character_       # códigos mal formados
    ),
    chapter2 = factor(chapter2, levels = niveles_icd10)
  )

mortality <- 
mortality |> 
mutate(edad_cant_cat = dplyr::case_when(
  edad_cant >= 18 & edad_cant < 30 ~ "18-29",
  edad_cant >= 30 & edad_cant < 45 ~ "30-44",
  edad_cant >= 45 & edad_cant < 60 ~ "45-59",
  edad_cant >= 60 & edad_cant < 86 ~ "60+",
  TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
),
sex_rec = dplyr::case_when(                 # optional renaming step
  sexo == 1   ~ "Male",
  sexo == 2 ~ "Female",
  TRUE~ NA_character_),
sex_rec= factor(sex_rec, levels = c("Male", "Female"))
) |> 
  filter(!is.na(sex_rec), !is.na(edad_cant_cat))


clean_df |> 
left_join(mortality[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")) |> nrow()
#7065
invisible("+1 row added")
#70064
#

clean_df |> 
  left_join(mortality[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")) |> group_by(hash_key) |> 
  count() |> 
  filter(n>1)

cat("Exclude the duplicated mortality")
mortality_deduplicated <- mortality |> 
  arrange(hashkey, ano_def, mes_def, dia_def) |>
  group_by(hashkey) |>
  slice_head(n = 1) |>
  ungroup()


#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_18<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="18-29")$hash_key) |> 
  janitor::tabyl(chapter, show_na = T)
mort_chap_30<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="30-44")$hash_key) |> 
  janitor::tabyl(chapter, show_na = T)
mort_chap_45<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="45-59")$hash_key) |> 
  janitor::tabyl(chapter, show_na = T)
mort_chap_60<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="60+")$hash_key) |>  
  janitor::tabyl(chapter, show_na = T)

mort_cat_18<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="18-29")$hash_key) |>  
  janitor::tabyl(category)
mort_cat_30<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="30-44")$hash_key) |>  
  janitor::tabyl(category, show_na = T)
mort_cat_45<- 
mortality|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="45-59")$hash_key) |>  
  janitor::tabyl(category)
mort_cat_60<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, disch_age_cat=="60+")$hash_key) |>  
  janitor::tabyl(category, show_na = T)


mort_chap_18 |> 
  full_join(mort_chap_30, by="chapter") |> 
  full_join(mort_chap_45, by="chapter") |> 
  full_join(mort_chap_60, by="chapter") |> 
  filter(!is.na(chapter)) |> 
  dplyr::select(chapter, dplyr::contains("valid")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
mort_cat_18 |> 
    full_join(mort_cat_30, by="category") |> 
    full_join(mort_cat_45, by="category") |> 
    full_join(mort_cat_60, by="category") |>   
    filter(!is.na(category)) |> 
    dplyr::select(category, dplyr::contains("percent")) |> 
    mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
    rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_male<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, sex_rec=="Male")$hash_key) |>  
  janitor::tabyl(chapter, show_na = T)
mort_chap_female<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, sex_rec=="Female")$hash_key) |>  
  janitor::tabyl(chapter, show_na = T)
mort_cat_male<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, sex_rec=="Male")$hash_key) |>  
  janitor::tabyl(category)
mort_cat_female<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, sex_rec=="Female")$hash_key) |>  
  janitor::tabyl(category, show_na = T)


mort_chap_male |> 
  full_join(mort_chap_female, by="chapter") |> 
  filter(!is.na(chapter)) |> 
  dplyr::select(chapter, dplyr::contains("valid")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
mort_cat_male |> 
    full_join(mort_cat_female, by="category") |> 
    filter(!is.na(category)) |> 
    dplyr::select(category, dplyr::contains("percent")) |> 
    mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
    rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_amb<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, res_plan==0)$hash_key) |>  
  janitor::tabyl(chapter)
mort_chap_res<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, res_plan==1)$hash_key) |>  
  janitor::tabyl(chapter)
mort_cat_amb<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, res_plan==0)$hash_key) |>  
  janitor::tabyl(category)
mort_cat_res<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, res_plan==1)$hash_key) |>  
  janitor::tabyl(category)


mort_chap_amb |> 
  full_join(mort_chap_res, by="chapter") |> 
  filter(!is.na(chapter)) |> 
  dplyr::select(chapter, dplyr::contains("valid")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
mort_cat_amb |> 
    full_join(mort_cat_res, by="category") |> 
    filter(!is.na(category)) |> 
    dplyr::select(category, dplyr::contains("percent")) |> 
    mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
    rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_licit<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, prim_sub_licit=="licit")$hash_key) |>  
  janitor::tabyl(chapter)
mort_chap_illicit<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, prim_sub_licit=="illicit")$hash_key) |>  
  janitor::tabyl(chapter)
mort_cat_licit<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, prim_sub_licit=="licit")$hash_key) |>  
  janitor::tabyl(category)
mort_cat_illicit<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, prim_sub_licit=="illicit")$hash_key) |>  
  janitor::tabyl(category)


mort_chap_licit |> 
  full_join(mort_chap_illicit, by="chapter") |> 
  filter(!is.na(chapter)) |> 
  dplyr::select(chapter, dplyr::contains("valid")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
mort_cat_licit |> 
    full_join(mort_cat_illicit, by="category") |> 
    filter(!is.na(category)) |> 
    dplyr::select(category, dplyr::contains("percent")) |> 
    mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
    rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mort_chap_comp<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, !grepl("Not", tr_compliance_status))$hash_key) |>  
  janitor::tabyl(chapter)
mort_chap_not_comp<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, grepl("Not", tr_compliance_status))$hash_key) |>  
  janitor::tabyl(chapter)
mort_cat_comp<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, !grepl("Not", tr_compliance_status))$hash_key) |>  
  janitor::tabyl(category)
mort_cat_not_comp<- 
mortality_deduplicated|> 
   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% subset(clean_df, grepl("Not", tr_compliance_status))$hash_key) |>  
  janitor::tabyl(category)

mort_chap_comp |> 
  full_join(mort_chap_not_comp, by="chapter") |> 
  filter(!is.na(chapter)) |> 
  dplyr::select(chapter, dplyr::contains("valid")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
mort_cat_comp |> 
  full_join(mort_cat_not_comp, by="category") |> 
  filter(!is.na(category)) |> 
  dplyr::select(category, dplyr::contains("percent")) |> 
  mutate_if(is.numeric, ~ gsub("%","",scales::percent(.,accuracy=.1))) |> 
  rio::export("clipboard")

Error in error_interactive(): To run write_clip() in non-interactive mode, either call write_clip() with allow_non_interactive = TRUE, or set the environment variable CLIPR_ALLOW=TRUE

Code
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
chapters_table <- mort_chap_18 %>% 
  dplyr::select(chapter, n, valid_percent) %>%
  dplyr::rename(`18-29_n` = n, `18-29` = valid_percent) %>%
  full_join(mort_chap_30 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`30-44_n` = n, `30-44` = valid_percent), by = "chapter") %>%
  full_join(mort_chap_45 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`45-59_n` = n, `45-59` = valid_percent), by = "chapter") %>%
  full_join(mort_chap_60 %>% dplyr::select(chapter, n, valid_percent) %>% rename(`60+_n` = n, `60+` = valid_percent), by = "chapter") %>%
  full_join(mort_chap_male %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Male_n` = n, Male = valid_percent), by = "chapter") %>%
  full_join(mort_chap_female %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Female_n` = n, Female = valid_percent), by = "chapter") %>%
  full_join(mort_chap_amb %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Ambulatory_n` = n, Ambulatory = valid_percent), by = "chapter") %>%
  full_join(mort_chap_res %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Residential_n` = n, Residential = valid_percent), by = "chapter") %>%
  full_join(mort_chap_licit %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Licit_n` = n, Licit = valid_percent), by = "chapter") %>%
  full_join(mort_chap_illicit %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Illicit_n` = n, Illicit = valid_percent), by = "chapter") %>%
  full_join(mort_chap_comp %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Completed_n` = n, Completed = valid_percent), by = "chapter") %>%
  full_join(mort_chap_not_comp %>% dplyr::select(chapter, n, valid_percent) %>% rename(`Not_Completed_n` = n, `Not Completed` = valid_percent), by = "chapter") %>%
  filter(!is.na(chapter)) %>%
  mutate(
    `18-29` = sprintf("%d (%.1f)", `18-29_n`, `18-29` * 100),
    `30-44` = sprintf("%d (%.1f)", `30-44_n`, `30-44` * 100),
    `45-59` = sprintf("%d (%.1f)", `45-59_n`, `45-59` * 100),
    `60+` = sprintf("%d (%.1f)", `60+_n`, `60+` * 100),
    Male = sprintf("%d (%.1f)", `Male_n`, Male * 100),
    Female = sprintf("%d (%.1f)", `Female_n`, Female * 100),
    Ambulatory = sprintf("%d (%.1f)", `Ambulatory_n`, Ambulatory * 100),
    Residential = sprintf("%d (%.1f)", `Residential_n`, Residential * 100),
    Licit = sprintf("%d (%.1f)", `Licit_n`, Licit * 100),
    Illicit = sprintf("%d (%.1f)", `Illicit_n`, Illicit * 100),
    Completed = sprintf("%d (%.1f)", `Completed_n`, Completed * 100),
    `Not Completed` = sprintf("%d (%.1f)", `Not_Completed_n`, `Not Completed` * 100)
  ) %>%
  dplyr::select(chapter, `18-29`, `30-44`, `45-59`, `60+`, Male, Female, 
                Ambulatory, Residential, Licit, Illicit, Completed, `Not Completed`)

# Create External Causes table (using percent)
categories_table <- mort_cat_18 %>% 
  dplyr::select(category, n, percent) %>%
  dplyr::rename(`18-29_n` = n, `18-29` = percent) %>%
  full_join(mort_cat_30 %>% dplyr::select(category, n, percent) %>% rename(`30-44_n` = n, `30-44` = percent), by = "category") %>%
  full_join(mort_cat_45 %>% dplyr::select(category, n, percent) %>% rename(`45-59_n` = n, `45-59` = percent), by = "category") %>%
  full_join(mort_cat_60 %>% dplyr::select(category, n, percent) %>% rename(`60+_n` = n, `60+` = percent), by = "category") %>%
  full_join(mort_cat_male %>% dplyr::select(category, n, percent) %>% rename(`Male_n` = n, Male = percent), by = "category") %>%
  full_join(mort_cat_female %>% dplyr::select(category, n, percent) %>% rename(`Female_n` = n, Female = percent), by = "category") %>%
  full_join(mort_cat_amb %>% dplyr::select(category, n, percent) %>% rename(`Ambulatory_n` = n, Ambulatory = percent), by = "category") %>%
  full_join(mort_cat_res %>% dplyr::select(category, n, percent) %>% rename(`Residential_n` = n, Residential = percent), by = "category") %>%
  full_join(mort_cat_licit %>% dplyr::select(category, n, percent) %>% rename(`Licit_n` = n, Licit = percent), by = "category") %>%
  full_join(mort_cat_illicit %>% dplyr::select(category, n, percent) %>% rename(`Illicit_n` = n, Illicit = percent), by = "category") %>%
  full_join(mort_cat_comp %>% dplyr::select(category, n, percent) %>% rename(`Completed_n` = n, Completed = percent), by = "category") %>%
  full_join(mort_cat_not_comp %>% dplyr::select(category, n, percent) %>% rename(`Not_Completed_n` = n, `Not Completed` = percent), by = "category") %>%
  filter(!is.na(category)) %>%
  mutate(
    `18-29` = sprintf("%d (%.1f)", `18-29_n`, `18-29` * 100),
    `30-44` = sprintf("%d (%.1f)", `30-44_n`, `30-44` * 100),
    `45-59` = sprintf("%d (%.1f)", `45-59_n`, `45-59` * 100),
    `60+` = sprintf("%d (%.1f)", `60+_n`, `60+` * 100),
    Male = sprintf("%d (%.1f)", `Male_n`, Male * 100),
    Female = sprintf("%d (%.1f)", `Female_n`, Female * 100),
    Ambulatory = sprintf("%d (%.1f)", `Ambulatory_n`, Ambulatory * 100),
    Residential = sprintf("%d (%.1f)", `Residential_n`, Residential * 100),
    Licit = sprintf("%d (%.1f)", `Licit_n`, Licit * 100),
    Illicit = sprintf("%d (%.1f)", `Illicit_n`, Illicit * 100),
    Completed = sprintf("%d (%.1f)", `Completed_n`, Completed * 100),
    `Not Completed` = sprintf("%d (%.1f)", `Not_Completed_n`, `Not Completed` * 100)
  ) %>%
  dplyr::select(category, `18-29`, `30-44`, `45-59`, `60+`, Male, Female, 
                Ambulatory, Residential, Licit, Illicit, Completed, `Not Completed`)

# Create Table 1: ICD-10 Chapters
table1 <- kable(chapters_table, 
                format = "html",
                col.names = c("ICD-10 Chapter", "18-29", "30-44", "45-59", "60+", 
                             "Male", "Female", "Ambulatory", "Residential", 
                             "Licit", "Illicit", "Completed", "Not Completed"),
                caption = "Table SXX. Distribution (%) of mortality according to ICD-10 chapters stratified by treatment completion, primary substance, treatment setting, sex, and age",
                escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = FALSE, font_size = 12) %>%
  add_header_above(c(" " = 1, "Age Groups" = 4, "Sex" = 2, "Treatment Setting" = 2, 
                     "Primary Substance" = 2, "Treatment Completion" = 2)) %>%
  column_spec(1, width = "12em", bold = TRUE) %>%
  column_spec(2:13, width = "6em") %>%
  row_spec(0, bold = TRUE, background = "#f0f0f0")

# Create Table 2: External Causes
table2 <- kable(categories_table, 
                format = "html",
                col.names = c("External Cause Category", "18-29", "30-44", "45-59", "60+", 
                             "Male", "Female", "Ambulatory", "Residential", 
                             "Licit", "Illicit", "Completed", "Not Completed"),
                caption = "Table SXX. Distribution (%) of external causes of mortality by treatment completion, primary substance, treatment setting, sex, and age",
                escape = FALSE) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
                full_width = FALSE, font_size = 12) %>%
  add_header_above(c(" " = 1, "Age Groups" = 4, "Sex" = 2, "Treatment Setting" = 2, 
                     "Primary Substance" = 2, "Treatment Completion" = 2)) %>%
  column_spec(1, width = "12em", bold = TRUE) %>%
  column_spec(2:13, width = "6em") %>%
  row_spec(0, bold = TRUE, background = "#f0f0f0")

# Print tables
table1
table2

  FALSE    TRUE 
1229256  102597 

    FALSE      TRUE 
92.296672  7.703328 
# A tibble: 1 × 7
        n n_ext_d1 n_ext_d2 n_ext_any n_miss_d1 n_miss_d2 prop_ext_any
    <int>    <int>    <int>     <int>     <int>     <int>        <dbl>
1 1331853        0    96553     96553         0   1229256       0.0725
[1] 70065
# A tibble: 1 × 2
# Groups:   hash_key [1]
  hash_key                                                             n
  <chr>                                                            <int>
1 2f9f74c4c0602d24c8de3e6bc473863c2bc9949ee8d31db6aad8323c6901083a     2
Exclude the duplicated mortality
Table SXX. Distribution (%) of mortality according to ICD-10 chapters stratified by treatment completion, primary substance, treatment setting, sex, and age
Age Groups
Sex
Treatment Setting
Primary Substance
Treatment Completion
ICD-10 Chapter 18-29 30-44 45-59 60+ Male Female Ambulatory Residential Licit Illicit Completed Not Completed
Circulatory 27 (17.9) 126 (18.9) 216 (22.5) 38 (23.2) 342 (22.4) 65 (15.8) 355 (21.9) 52 (16.5) 250 (18.9) 157 (25.6) 117 (24.6) 290 (19.8)
Digestive 26 (17.2) 237 (35.6) 380 (39.7) 61 (37.2) 562 (36.8) 142 (34.5) 599 (36.9) 105 (33.2) 603 (45.5) 101 (16.5) 137 (28.8) 567 (38.7)
Endocrine & metabolic 3 (2.0) 21 (3.2) 15 (1.6) 3 (1.8) 33 (2.2) 9 (2.2) 32 (2.0) 10 (3.2) 23 (1.7) 19 (3.1) 8 (1.7) 34 (2.3)
Infectious & parasitic 17 (11.3) 71 (10.7) 38 (4.0) 3 (1.8) 86 (5.6) 43 (10.5) 99 (6.1) 30 (9.5) 52 (3.9) 77 (12.6) 31 (6.5) 98 (6.7)
Malignant neoplasms 25 (16.6) 50 (7.5) 115 (12.0) 30 (18.3) 161 (10.5) 59 (14.4) 191 (11.8) 29 (9.2) 127 (9.6) 93 (15.2) 69 (14.5) 151 (10.3)
Mental and behavioural 3 (2.0) 18 (2.7) 25 (2.6) 3 (1.8) 45 (2.9) 4 (1.0) 40 (2.5) 9 (2.8) 38 (2.9) 11 (1.8) 9 (1.9) 40 (2.7)
Nervous system 7 (4.6) 19 (2.9) 19 (2.0) 2 (1.2) 39 (2.6) 8 (1.9) 38 (2.3) 9 (2.8) 25 (1.9) 22 (3.6) 9 (1.9) 38 (2.6)
Other causes 11 (7.3) 16 (2.4) 20 (2.1) 6 (3.7) 42 (2.7) 11 (2.7) 45 (2.8) 8 (2.5) 28 (2.1) 25 (4.1) 22 (4.6) 31 (2.1)
Respiratory 17 (11.3) 63 (9.5) 90 (9.4) 11 (6.7) 145 (9.5) 36 (8.8) 138 (8.5) 43 (13.6) 115 (8.7) 66 (10.8) 45 (9.5) 136 (9.3)
Symptoms & signs 15 (9.9) 45 (6.8) 40 (4.2) 7 (4.3) 73 (4.8) 34 (8.3) 86 (5.3) 21 (6.6) 65 (4.9) 42 (6.9) 28 (5.9) 79 (5.4)
Table SXX. Distribution (%) of external causes of mortality by treatment completion, primary substance, treatment setting, sex, and age
Age Groups
Sex
Treatment Setting
Primary Substance
Treatment Completion
External Cause Category 18-29 30-44 45-59 60+ Male Female Ambulatory Residential Licit Illicit Completed Not Completed
Assaults 55 (13.4) 54 (4.6) 11 (0.9) 0 (0.0) 103 (4.3) 17 (2.8) 90 (3.6) 30 (5.9) 20 (1.1) 100 (8.0) 9 (1.3) 111 (4.8)
Intentional self-harm 99 (24.1) 220 (18.6) 74 (6.1) 9 (4.7) 323 (13.5) 79 (13.2) 333 (13.4) 69 (13.6) 129 (7.4) 273 (21.8) 72 (10.7) 330 (14.2)
No external causes 154 (37.5) 678 (57.4) 982 (81.0) 168 (88.4) 1560 (65.1) 422 (70.3) 1660 (66.7) 322 (63.4) 1352 (77.6) 630 (50.2) 492 (73.3) 1490 (64.1)
Other external causes 1 (0.2) 3 (0.3) 3 (0.2) 0 (0.0) 6 (0.3) 1 (0.2) 6 (0.2) 1 (0.2) 2 (0.1) 5 (0.4) 1 (0.1) 6 (0.3)
Other unintentional external causes of injury 59 (14.4) 145 (12.3) 107 (8.8) 12 (6.3) 271 (11.3) 52 (8.7) 265 (10.7) 58 (11.4) 174 (10.0) 149 (11.9) 65 (9.7) 258 (11.1)
Transport accidents 43 (10.5) 82 (6.9) 36 (3.0) 1 (0.5) 133 (5.6) 29 (4.8) 134 (5.4) 28 (5.5) 65 (3.7) 97 (7.7) 32 (4.8) 130 (5.6)
Code
mx_1x1_comp<-
  rbind.data.frame(cbind.data.frame(sex="male", mltper_1x1), 
                   cbind.data.frame(sex="female", fltper_1x1))
mx_1x1_comp$Age<- as.numeric(mx_1x1_comp$Age)

Warning: NAs introducidos por coerción

Code
mx_1x1_comp_filt<-mx_1x1_comp[as.numeric(as.character(mx_1x1_comp$Year)) %in% years_followup,]
mx_1x1_comp_filt2<-mx_1x1_comp_filt[as.numeric(as.character(mx_1x1_comp_filt$Age)) %in% 
                                      min(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int):max(SISTRAT23_c1_2010_2022_df_prev1q_sel4a_surv$adm_age_rec2_int),]


order_levels <- c(
  "Infectious & parasitic",
  "Malignant neoplasms",
  "Endocrine & metabolic",
  "Mental and behavioral",
  "Nervous system",
  "Circulatory",
  "Respiratory",
  "Digestive",
  "Symptoms & signs",
  "Other underlying causes"
)

cat("Mortality SUD population, underlying\n")
mortality_deduplicated|> 
  dplyr::filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  dplyr::filter(hashkey %in% clean_df$hash_key) |>
 dplyr::mutate(
    chapter = recode(chapter,
      "Mental and behavioural" = "Mental and behavioral",
      "Other causes"           = "Other underlying causes"
    ),
    chapter = factor(chapter, levels = order_levels, ordered = TRUE)
  ) %>%
  dplyr::arrange(chapter) |> 
  janitor::tabyl(chapter, show_na = T) |> 
  mutate(percent= sprintf("%1.1f",100*percent)) |> 
  pull(percent)

sum(as.numeric(mortality_deduplicated|> 
                   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
                   filter(hashkey %in% clean_df$hash_key) |>   
                   janitor::tabyl(chapter, show_na = T) |> 
                   mutate(percent= sprintf("%1.3f",100*percent)) |> 
                   pull(percent))[-11])

cat("Mortality SUD population, external\n")
mortality_deduplicated|> 
  filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  filter(hashkey %in% clean_df$hash_key) |>   
  janitor::tabyl(category, show_na = T) |> 
  mutate(percent= sprintf("%1.1f",100*percent)) |> 
  pull(percent)

sum(as.numeric(mortality_deduplicated|> 
    filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
    filter(hashkey %in% clean_df$hash_key) |>   
    janitor::tabyl(category, show_na = T) |> 
    mutate(percent= sprintf("%1.3f",100*percent)) |> pull(percent))[-3])

cat("Mortality gral population, underlying\n")
mortality_deduplicated|> 
  dplyr::filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
  #dplyr::filter(hashkey %in% clean_df$hash_key) |>
 dplyr::mutate(
    chapter = recode(chapter,
      "Mental and behavioural" = "Mental and behavioral",
      "Other causes"           = "Other underlying causes"
    ),
    chapter = factor(chapter, levels = order_levels, ordered = TRUE)
  ) %>%
  dplyr::arrange(chapter) |> 
  janitor::tabyl(chapter, show_na = T) |> 
  mutate(percent= sprintf("%1.1f",100*percent)) |> 
  pull(percent)

sum(as.numeric(mortality_deduplicated|> 
                   filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
                   #filter(hashkey %in% clean_df$hash_key) |>   
                   janitor::tabyl(chapter, show_na = T) |> 
                   mutate(percent= sprintf("%1.3f",100*percent)) |> 
                   pull(percent))[-11])


cat("Mortality gral population, external\n")

mortality_deduplicated|> 
    filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
    #filter(hashkey %in% clean_df$hash_key) |>   
    janitor::tabyl(category, show_na = T) |> 
    mutate(percent= sprintf("%1.1f",100*percent)) 

sum(as.numeric(
mortality_deduplicated|> 
    filter(death_date>="2010-01-01", death_date<"2021-01-01")|> 
    #filter(hashkey %in% clean_df$hash_key) |>   
    janitor::tabyl(category, show_na = T) |> 
    mutate(percent= sprintf("%1.2f",100*percent)) |> pull(percent))[-3])

mortality_summary <- 
  mortality_deduplicated |> 
    filter(ano_def>=2010) |> 
    filter(!is.na(category)) |> 
    #filter(!grepl("Other causes",category)) |> 
    group_by(ano_def, sex_rec, edad_cant) |> 
    summarise(assaults=sum(category=="Assaults", na.rm=T),
              self_harm=sum(category=="Intentional self-harm", na.rm=T),
              other_causes=sum(category=="No external causes", na.rm=T), 
              other_ext_causes= sum(category=="Other external causes", na.rm=T), 
              other_ext_causes_unint_inj= sum(grepl("Other unintentional",category), na.rm=T), 
              transport_accidents= sum(grepl("Transport",category), na.rm=T)
              )

summarise() has grouped output by ‘ano_def’, ‘sex_rec’. You can override using the .groups argument.

Code
mx_1x1_comp_filt3 <- 
mx_1x1_comp_filt2|> 
  mutate(edad_cant_cat = dplyr::case_when(
    Age>= 18 & Age < 30 ~ "18-29",
    Age>= 30 & Age < 45 ~ "30-44",
    Age>= 45 & Age < 60 ~ "45-59",
    Age>= 60 & Age < 86 ~ "60+",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ),
  sex_rec = dplyr::case_when(                 # optional renaming step
    sex == "male"   ~ "Male",
    sex == "female" ~ "Female",
    TRUE~ NA_character_),
  sex_rec= factor(sex_rec, levels = c("Male", "Female"))
  )|> 
  left_join(mortality_summary, by= c("Year"="ano_def", "sex_rec"= "sex_rec", "Age"= "edad_cant"))|> 
  group_by(Year, sex_rec, edad_cant_cat)|> 
  summarise(
    sum_assaults = sum(assaults),
    sum_self_harm = sum(self_harm),
    sum_other_causes = sum(other_causes),
    sum_other_ext_causes = sum(other_ext_causes),
    sum_other_unint_ext_causes = sum(other_ext_causes_unint_inj),
    sum_transport_accidents = sum(transport_accidents),
    Lx_total = sum(Lx)  # Sumar los años-persona
  ) |> 
  mutate(haz_assaults= sum_assaults/Lx_total, 
         haz_self_harm= sum_self_harm/Lx_total, 
         haz_other_causes= sum_other_causes/Lx_total, 
         haz_other_ext_causes= sum_other_ext_causes/Lx_total,
         haz_other_unint_ext_causes= sum_other_unint_ext_causes/Lx_total,         
         haz_transport_accidents= sum_transport_accidents/Lx_total) 

summarise() has grouped output by ‘Year’, ‘sex_rec’. You can override using the .groups argument.

Code
mx_1x1_comp_filt3 <- 
  mx_1x1_comp_filt3|>
  mutate(agegroup = dplyr::case_when(
    grepl("18",edad_cant_cat)~ 18,
    grepl("30",edad_cant_cat)~ 30,
    grepl("45",edad_cant_cat)~ 45,
    grepl("60",edad_cant_cat)~ 60,
    TRUE ~ NA_real_  # Opcional: manejo de valores fuera de rango
  ))|> 
  rename("year"="Year", "sex"="sex_rec")

c_SISTRAT_c1_assaults <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                              status = category=="Assaults", 
                              birth = birth_date_rec, 
                              exit = death_date_rec, entry = disch_date_rec6,
                              #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                              breaks = list(per = seq(2010, 2021, by = 1), 
                                            #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                            age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                              aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_assaults_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_assaults, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_assaults', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)

c_SISTRAT_c1_self_harm <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                 status = category=="Intentional self-harm", 
                                 birth = birth_date_rec, 
                                 exit = death_date_rec, entry = disch_date_rec6,
                                 #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                 breaks = list(per = seq(2010, 2021, by = 1), 
                                               #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                               age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                 aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_self_harm_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_self_harm, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_self_harm', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)

c_SISTRAT_c1_transport <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                  status = category=="Transport accidents", 
                                  birth = birth_date_rec, 
                                  exit = death_date_rec, entry = disch_date_rec6,
                                  #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                  breaks = list(per = seq(2010, 2021, by = 1), 
                                                #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                  aggre = list(agegroup = age, year = per, sex= sex_rec))

sr_1_accidents_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_transport, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_transport_accidents', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)

c_SISTRAT_c1_other_ext <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                  status = category=="Other external causes", 
                                  birth = birth_date_rec, 
                                  exit = death_date_rec, entry = disch_date_rec6,
                                  #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                  breaks = list(per = seq(2010, 2021, by = 1), 
                                                #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                  aggre = list(agegroup = age, year = per, sex= sex_rec))

sr_1_other_ext_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_other_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_other_ext_causes', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)


c_SISTRAT_c1_other_unint_ext <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                  status = grepl("Other unintentional",category), 
                                  birth = birth_date_rec, 
                                  exit = death_date_rec, entry = disch_date_rec6,
                                  #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                  breaks = list(per = seq(2010, 2021, by = 1), 
                                                #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                  aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_unint_ext_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_other_unint_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_other_unint_ext_causes', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)

sr_1_other_ext_df <- 
popEpi::sir( coh.data = c_SISTRAT_c1_other_ext, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_other_ext_causes', 
             adjust = c('agegroup','year','sex'), 
             EAR=T)

c_SISTRAT_c1_other <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")),
                                  status = category=="No external causes",
                                  birth = birth_date_rec,
                                  exit = death_date_rec, entry = disch_date_rec6,
                                  #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                  breaks = list(per = seq(2010, 2021, by = 1),
                                                #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                  aggre = list(agegroup = age, year = per, sex= sex_rec))

sr_1_other_causes_df <-
popEpi::sir( coh.data = c_SISTRAT_c1_other, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
             ref.data = mx_1x1_comp_filt3, #this should be only for people with assaults
             ref.rate = 'haz_other_causes',
             adjust = c('agegroup','year','sex'),
             EAR=T)


sir_assaults_df<- 
  cbind.data.frame(
    total= "Assaults/ Aggressions  (X85–Y09)",
    observed= round(sr_1_assaults_df$observed,0),
    pyrs= round(sr_1_assaults_df$pyrs,0),
    CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_assaults_df$observed, sr_1_assaults_df$pyrs, phi= 1))))),
    expected= round(sr_1_assaults_df$expected,0),
    SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_assaults_df, phi= extract_phi(c_SISTRAT_c1_assaults))[ , 1:3])))),
    EAR= as.character(sprintf("%.2f",sr_1_assaults_df$EAR)), 
    phi=extract_phi(c_SISTRAT_c1_assaults))

Warning: glm.fit: fitted rates numerically 0 occurred

Warning: glm.fit: fitted rates numerically 0 occurred

Code
sir_self_harm_df<- 
  cbind.data.frame(
    total= "Intentional self-harm (X60–X84)",
    observed= round(sr_1_self_harm_df$observed,0),
    pyrs= round(sr_1_self_harm_df$pyrs,0),
    CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_self_harm_df$observed, sr_1_self_harm_df$pyrs, phi= 1))))),
    expected= round(sr_1_self_harm_df$expected,0),
    SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_self_harm_df, phi= extract_phi(c_SISTRAT_c1_self_harm))[ , 1:3])))),
    EAR= as.character(sprintf("%.2f",sr_1_self_harm_df$EAR)), 
    phi=extract_phi(c_SISTRAT_c1_self_harm))
sir_other_unint_ext_df<- 
  cbind.data.frame(
    total= "Other unintentional external causes of injury",
    observed= round(sr_1_other_unint_ext_df$observed,0),
    pyrs= round(sr_1_other_unint_ext_df$pyrs,0),
    CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_other_unint_ext_df$observed, sr_1_other_unint_ext_df$pyrs, phi= 1))))),
    expected= round(sr_1_other_unint_ext_df$expected,0),
    SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_other_unint_ext_df, phi= extract_phi(c_SISTRAT_c1_other_unint_ext))[ , 1:3])))),
    EAR= as.character(sprintf("%.2f",sr_1_other_unint_ext_df$EAR)), 
    phi=extract_phi(c_SISTRAT_c1_other_unint_ext))
sir_other_df<- 
  cbind.data.frame(
    total= "No external causes",
    observed= round(sr_1_other_causes_df$observed,0),
    pyrs= round(sr_1_other_causes_df$pyrs,0),
    CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_other_causes_df$observed, sr_1_other_causes_df$pyrs, phi= 1))))),
    expected= round(sr_1_other_causes_df$expected,0),
    SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_other_causes_df, phi= extract_phi(c_SISTRAT_c1_other))[ , 1:3])))),
    EAR= as.character(sprintf("%.2f",sr_1_other_causes_df$EAR)), 
    phi=extract_phi(c_SISTRAT_c1_other))
sir_transport_df<- 
  cbind.data.frame(
    total= "Transport accidents",
    observed= round(sr_1_accidents_df$observed,0),
    pyrs= round(sr_1_accidents_df$pyrs,0),
    CMR_1000= do.call(sprintf,c("%.1f (%.1f–%.1f)",as.list(unlist(cmr_ci_phi(sr_1_accidents_df$observed, sr_1_accidents_df$pyrs, phi= 1))))),
    expected= round(sr_1_accidents_df$expected,0),
    SMR= do.call(sprintf,c("%.6f (%.6f–%.6f)",as.list(unlist(sir_ci_phi_improved(sr_1_accidents_df, phi= extract_phi(c_SISTRAT_c1_transport))[ , 1:3])))),
    EAR= as.character(sprintf("%.2f",sr_1_accidents_df$EAR)), 
    phi=extract_phi(c_SISTRAT_c1_transport))

#c_SISTRAT_c1_assaults c_SISTRAT_c1_self_harm c_SISTRAT_c1_transport c_SISTRAT_c1_other_ext c_SISTRAT_c1_other
#sr_1_assaults_df sr_1_self_harm_df sr_1_accidents_df sr_1_other_unint_ext_df sr_1_other_causes_df

cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sir_assaults_df, sir_self_harm_df, sir_other_unint_ext_df, sir_other_df, sir_transport_df)|> 
  rename("Characteristic"="total")|>
  (\(df) {
    df->> df_smr_ind_ext
    df
  })()|> 
  extract(
    SMR,
    into   = c("est", "low", "high"),
    regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
    convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
  dplyr::select(-est, -low, -high)|>  
  mutate(obs_exp= paste0(observed, "/", expected)) |> 
  knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. External causes")
Mortality SUD population, underlying
 [1] "4.3"  "7.3"  "1.4"  "1.6"  "1.6"  "13.6" "6.0"  "23.5" "3.6"  "1.8" 
[11] "35.3"
[1] 64.72
Mortality SUD population, external
[1] "4.0"  "13.4" "66.2" "0.2"  "10.8" "5.4" 
[1] 33.845
Mortality gral population, underlying
 [1] "2.4"  "28.4" "4.9"  "1.3"  "3.1"  "25.1" "8.4"  "8.4"  "1.8"  "5.7" 
[11] "10.5"
[1] 89.503
Mortality gral population, external
                                      category      n percent
                                      Assaults   6851     0.8
                         Intentional self-harm  19326     2.3
                            No external causes 780325    91.3
                         Other external causes   1552     0.2
 Other unintentional external causes of injury  26382     3.1
                           Transport accidents  20707     2.4
[1] 8.75
Dispersion-corrected 95% confidence intervals
All-cause SMRs for patients who accessed SUD treatment by sex and age group. External causes
Characteristic observed pyrs CMR_1000 expected EAR phi SMR_dir obs_exp
Assaults/ Aggressions (X85–Y09) 120 353826 0.3 (0.3–0.4) 33 0.25 0.8608439 3.63 (3.07–4.28) 120/33
Intentional self-harm (X60–X84) 402 353826 1.1 (1.0–1.3) 82 0.90 1.0840996 4.91 (4.43–5.44) 402/82
Other unintentional external causes of injury 323 353826 0.9 (0.8–1.0) 82 0.68 1.9441330 3.95 (3.39–4.60) 323/82
No external causes 1982 353826 5.6 (5.4–5.9) 739 3.51 1.3468431 2.68 (2.55–2.82) 1982/739
Transport accidents 162 353826 0.5 (0.4–0.5) 78 0.24 0.9231048 2.09 (1.80–2.42) 162/78
Code
r2_adj_assaults <-
  rate(
    data    = c_SISTRAT_c1_assaults,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_assaults <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_assaults$rate.adj,           # primer vector (rate)
  r2_adj_assaults$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    #phi    = extract_phi_dir(c_SISTRAT_c1_assaults),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_self_harm <-
  rate(
    data    = c_SISTRAT_c1_self_harm,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_self_harm <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_self_harm$rate.adj,           # primer vector (rate)
  r2_adj_self_harm$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_self_harm),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_transport <-
  rate(
    data    = c_SISTRAT_c1_transport,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_transport <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_transport$rate.adj,           # primer vector (rate)
  r2_adj_transport$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_transport),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_other_unint_ext <-
  rate(
    data    = c_SISTRAT_c1_other_unint_ext,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_other_unint_ext <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_other_unint_ext$rate.adj,           # primer vector (rate)
  r2_adj_other_unint_ext$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_other_unint_ext),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_other <-
rate(
  data    = c_SISTRAT_c1_other,
  obs     = from0to1,
  pyrs    = pyrs,
  #print   = year,
  adjust  = c("year", "sex", "agegroup"),
  weights = weights_df #weights inglm should be applied in the offset
)

DSR_1k_other <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_other$rate.adj,           # primer vector (rate)
  r2_adj_other$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_other),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

tasa_ponderada <- mx_1x1_comp_filt3 |> 
  ungroup() |> 
  left_join(
    weights_df,
    by = c("year", "agegroup", "sex")) |> 
  summarise(
    haz_assaults_w = sum(haz_assaults * weights),
    haz_self_harm_w = sum(haz_self_harm * weights),
    haz_other_causes_w = sum(haz_other_causes * weights),
    haz_other_unint_ext_causes_w = sum(haz_other_unint_ext_causes * weights),
    haz_transport_accidents_w = sum(haz_transport_accidents * weights)
  ) *1e3
round(tasa_ponderada,2)


rbind.data.frame(
  cbind.data.frame(var="Assaults/ Aggressions", t(r2_adj_assaults[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_assaults),
  cbind.data.frame(var="Intentional self-harm", t(r2_adj_self_harm[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_self_harm),
  cbind.data.frame(var="Other unintentional external causes", t(r2_adj_other_unint_ext[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other_unint_ext),
  cbind.data.frame(var="No external causes", t(r2_adj_other[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other),
  cbind.data.frame(var="Transport accidents", t(r2_adj_transport[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_transport)
)|>
  (\(df) {
    df->> df_smr_dir_ext
    df
  })()|> 
  mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|> 
  mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
  dplyr::select(-any_of(2:7))|>
  extract(
    SMR_dir,
    into   = c("est", "low", "high"),
    regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
    convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>  
  dplyr::select(-est, -low, -high)|>  
  rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|> 
  knitr::kable("markdown", caption= "SMRs, direct method, last treatment followed")
  haz_assaults_w haz_self_harm_w haz_other_causes_w
1           0.06            0.17               2.71
  haz_other_unint_ext_causes_w haz_transport_accidents_w
1                         0.17                      0.17
SMRs, direct method, last treatment followed
var CMR DSR DSR (SEs robust to dispersion)
Assaults/ Aggressions 0.3 (0.3–0.4) 0.3 (0.2–0.4) 0.3 (0.2–0.4)
Intentional self-harm 1.1 (1.0–1.3) 1.0 (0.8–1.2) 1.0 (0.8–1.2)
Other unintentional external causes 0.9 (0.8–1.0) 3.3 (0.8–14.3) 3.3 (0.4–24.3)
No external causes 5.6 (5.4–5.9) 8.1 (7.1–9.3) 8.1 (7.1–9.3)
Transport accidents 0.5 (0.4–0.5) 0.4 (0.3–0.6) 0.4 (0.3–0.5)
Code
# Create mortality summary by disease category
mortality_summary2 <- 
  mortality_deduplicated |> 
  filter(ano_def>=2010) |> 
  filter(!is.na(category)) |> 
  #filter(!grepl("Other causes",category)) |> 
  group_by(ano_def, sex_rec, edad_cant) |> 
  summarise(circulatory=sum(chapter=="Circulatory", na.rm=T),
            digestive=sum(chapter=="Digestive", na.rm=T),
            endocrine_metabolic= sum(grepl("Endocrine", chapter), na.rm=T),
            infectious_parasitic= sum(grepl("parasitic", chapter), na.rm=T),
            malignant_neoplasms= sum(grepl("neoplasms", chapter), na.rm=T),
            mental= sum(grepl("Mental", chapter), na.rm=T),
            nervous= sum(grepl("Nervous", chapter), na.rm=T),
            other_causes= sum(grepl("Other", chapter), na.rm=T),
            respiratory= sum(grepl("Respiratory",chapter), na.rm=T),
            symptoms_signs= sum(grepl("Symptoms",chapter), na.rm=T)
  )

summarise() has grouped output by ‘ano_def’, ‘sex_rec’. You can override using the .groups argument.

Code
# Create hazard rates dataset with age categories and disease-specific hazards
mx_1x1_comp_filt3b <- 
  mx_1x1_comp_filt2|> 
  mutate(edad_cant_cat = dplyr::case_when(
    Age>= 18 & Age < 30 ~ "18-29",
    Age>= 30 & Age < 45 ~ "30-44",
    Age>= 45 & Age < 60 ~ "45-59",
    Age>= 60 & Age < 86 ~ "60+",
    TRUE ~ NA_character_  # Opcional: manejo de valores fuera de rango
  ),
  sex_rec = dplyr::case_when(                 # optional renaming step
    sex == "male"   ~ "Male",
    sex == "female" ~ "Female",
    TRUE~ NA_character_),
  sex_rec= factor(sex_rec, levels = c("Male", "Female"))
  )|> 
  left_join(mortality_summary2, by= c("Year"="ano_def", "sex_rec"= "sex_rec", "Age"= "edad_cant"))|> 
  group_by(Year, sex_rec, edad_cant_cat)|> 
  summarise(
    sum_circulatory = sum(circulatory, na.rm=T),
    sum_digestive = sum(digestive, na.rm=T),
    sum_endocrine_metabolic = sum(endocrine_metabolic, na.rm=T),
    sum_infectious_parasitic = sum(infectious_parasitic, na.rm=T),
    sum_malignant_neoplasms = sum(malignant_neoplasms, na.rm=T),
    sum_mental = sum(mental, na.rm=T),
    sum_nervous = sum(nervous, na.rm=T),
    sum_other_causes = sum(other_causes, na.rm=T),
    sum_respiratory = sum(respiratory, na.rm=T),
    sum_symptoms_signs = sum(symptoms_signs, na.rm=T),
    Lx_total = sum(Lx)  # Sumar los años-persona
  ) |> 
  mutate(haz_circulatory= sum_circulatory/Lx_total, 
         haz_digestive= sum_digestive/Lx_total, 
         haz_endocrine_metabolic= sum_endocrine_metabolic/Lx_total, 
         haz_infectious_parasitic= sum_infectious_parasitic/Lx_total,
         haz_malignant_neoplasms= sum_malignant_neoplasms/Lx_total,
         haz_mental= sum_mental/Lx_total,
         haz_nervous= sum_nervous/Lx_total,
         haz_other_causes= sum_other_causes/Lx_total,
         haz_respiratory= sum_respiratory/Lx_total,
         haz_symptoms_signs= sum_symptoms_signs/Lx_total
  )

summarise() has grouped output by ‘Year’, ‘sex_rec’. You can override using the .groups argument.

Code
# Add agegroup variable and rename columns for analysis
mx_1x1_comp_filt3b <- 
  mx_1x1_comp_filt3b|>
  mutate(agegroup = dplyr::case_when(
    grepl("18",edad_cant_cat)~ 18,
    grepl("30",edad_cant_cat)~ 30,
    grepl("45",edad_cant_cat)~ 45,
    grepl("60",edad_cant_cat)~ 60,
    TRUE ~ NA_real_  # Opcional: manejo de valores fuera de rango
  ))|> 
  rename("year"="Year", "sex"="sex_rec")


c_SISTRAT_c1_circulatory <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                 status = grepl("Circulatory",chapter), 
                                 birth = birth_date_rec, 
                                 exit = death_date_rec, entry = disch_date_rec6,
                                 #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                 breaks = list(per = seq(2010, 2021, by = 1), 
                                               #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                               age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                 aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_circulatory_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_circulatory, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_circulatory', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_digestive <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                    status = grepl("Digestive",chapter), 
                                    birth = birth_date_rec, 
                                    exit = death_date_rec, entry = disch_date_rec6,
                                    #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                    breaks = list(per = seq(2010, 2021, by = 1), 
                                                  #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                  age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                    aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_digestive_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_digestive, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_digestive', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_endocrine_metabolic <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                  status = grepl("Endocrine",chapter), 
                                  birth = birth_date_rec, 
                                  exit = death_date_rec, entry = disch_date_rec6,
                                  #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                  breaks = list(per = seq(2010, 2021, by = 1), 
                                                #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                  aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_endocrine_metabolic_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_endocrine_metabolic, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_endocrine_metabolic', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_infectious_parasitic <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                      status = grepl("parasitic",chapter), 
                                      birth = birth_date_rec, 
                                      exit = death_date_rec, entry = disch_date_rec6,
                                      #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                      breaks = list(per = seq(2010, 2021, by = 1), 
                                                    #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                    age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                      aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_infectious_parasitic_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_infectious_parasitic, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_infectious_parasitic', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_malignant_neoplasms <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                      status = grepl("neoplasms",chapter), 
                                      birth = birth_date_rec, 
                                      exit = death_date_rec, entry = disch_date_rec6,
                                      #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                      breaks = list(per = seq(2010, 2021, by = 1), 
                                                    #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                    age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                      aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_malignant_neoplasms_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_malignant_neoplasms, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_malignant_neoplasms', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_mental <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                      status = grepl("Mental",chapter), 
                                      birth = birth_date_rec, 
                                      exit = death_date_rec, entry = disch_date_rec6,
                                      #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                      breaks = list(per = seq(2010, 2021, by = 1), 
                                                    #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                    age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                      aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_mental_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_mental, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_mental', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_nervous <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                      status = grepl("Nervous",chapter), 
                                      birth = birth_date_rec, 
                                      exit = death_date_rec, entry = disch_date_rec6,
                                      #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                      breaks = list(per = seq(2010, 2021, by = 1), 
                                                    #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                    age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                      aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_nervous_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_nervous, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_nervous', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_other_causes <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                     status = grepl("Other",chapter), 
                                     birth = birth_date_rec, 
                                     exit = death_date_rec, entry = disch_date_rec6,
                                     #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                     breaks = list(per = seq(2010, 2021, by = 1), 
                                                   #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                   age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                     aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_other_causes_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_other_causes, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_other_causes', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_respiratory <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                    status = grepl("Respiratory",chapter), 
                                    birth = birth_date_rec, 
                                    exit = death_date_rec, entry = disch_date_rec6,
                                    #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                    breaks = list(per = seq(2010, 2021, by = 1), 
                                                  #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                  age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                    aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_respiratory_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_respiratory, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_respiratory', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)
c_SISTRAT_c1_symptoms_signs <- lexpand(clean_df|> left_join(mortality_deduplicated[,c("hashkey", "edad_cant_cat", "category", "chapter")], by=c("hash_key"="hashkey")), 
                                    status = grepl("Symptoms",chapter), 
                                    birth = birth_date_rec, 
                                    exit = death_date_rec, entry = disch_date_rec6,
                                    #2025-06-11= le tuve que dar apertura para que integrarara a los que se retiraban después.
                                    breaks = list(per = seq(2010, 2021, by = 1), 
                                                  #2025-06-11=No filtro por la edad en que fallece la persona, el filtro ya lo hice arriba
                                                  age = c(18, 30, 45, 60, 76)), #, fot = c(0, 1, 3, 5, Inf)
                                    aggre = list(agegroup = age, year = per, sex= sex_rec))
sr_1_symptoms_signs_df <- 
  popEpi::sir( coh.data = c_SISTRAT_c1_symptoms_signs, coh.obs = 'from0to1', coh.pyrs = 'pyrs',
               ref.data = mx_1x1_comp_filt3b, #this should be only for people with assaults
               ref.rate = 'haz_symptoms_signs', 
               adjust = c('agegroup','year','sex'), 
               EAR=T)

sir_circulatory_df <- 
  cbind.data.frame(
    total = "Circulatory System Diseases",
    observed = round(sr_1_circulatory_df$observed, 0),
    pyrs = round(sr_1_circulatory_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_circulatory_df$observed, sr_1_circulatory_df$pyrs, phi = 1))))),
    expected = round(sr_1_circulatory_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_circulatory_df, phi = extract_phi(c_SISTRAT_c1_circulatory))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_circulatory_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_circulatory))

sir_digestive_df <- 
  cbind.data.frame(
    total = "Digestive System Diseases",
    observed = round(sr_1_digestive_df$observed, 0),
    pyrs = round(sr_1_digestive_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_digestive_df$observed, sr_1_digestive_df$pyrs, phi = 1))))),
    expected = round(sr_1_digestive_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_digestive_df, phi = extract_phi(c_SISTRAT_c1_digestive))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_digestive_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_digestive))

sir_endocrine_metabolic_df <- 
  cbind.data.frame(
    total = "Endocrine and Metabolic Diseases",
    observed = round(sr_1_endocrine_metabolic_df$observed, 0),
    pyrs = round(sr_1_endocrine_metabolic_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_endocrine_metabolic_df$observed, sr_1_endocrine_metabolic_df$pyrs, phi = 1))))),
    expected = round(sr_1_endocrine_metabolic_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_endocrine_metabolic_df, phi = extract_phi(c_SISTRAT_c1_endocrine_metabolic))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_endocrine_metabolic_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_endocrine_metabolic))

sir_infectious_parasitic_df <- 
  cbind.data.frame(
    total = "Infectious and Parasitic Diseases",
    observed = round(sr_1_infectious_parasitic_df$observed, 0),
    pyrs = round(sr_1_infectious_parasitic_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_infectious_parasitic_df$observed, sr_1_infectious_parasitic_df$pyrs, phi = 1))))),
    expected = round(sr_1_infectious_parasitic_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_infectious_parasitic_df, phi = extract_phi(c_SISTRAT_c1_infectious_parasitic))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_infectious_parasitic_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_infectious_parasitic))

sir_malignant_neoplasms_df <- 
  cbind.data.frame(
    total = "Malignant neoplasms",
    observed = round(sr_1_malignant_neoplasms_df$observed, 0),
    pyrs = round(sr_1_malignant_neoplasms_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_malignant_neoplasms_df$observed, sr_1_malignant_neoplasms_df$pyrs, phi = 1))))),
    expected = round(sr_1_malignant_neoplasms_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_malignant_neoplasms_df, phi = extract_phi(c_SISTRAT_c1_malignant_neoplasms))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_malignant_neoplasms_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_malignant_neoplasms))

sir_mental_df <- 
  cbind.data.frame(
    total = "Mental and behavioral",
    observed = round(sr_1_mental_df$observed, 0),
    pyrs = round(sr_1_mental_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_mental_df$observed, sr_1_mental_df$pyrs, phi = 1))))),
    expected = round(sr_1_mental_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_mental_df, phi = extract_phi(c_SISTRAT_c1_mental))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_mental_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_mental))

sir_nervous_df <- 
  cbind.data.frame(
    total = "Nervous system",
    observed = round(sr_1_nervous_df$observed, 0),
    pyrs = round(sr_1_nervous_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_nervous_df$observed, sr_1_nervous_df$pyrs, phi = 1))))),
    expected = round(sr_1_nervous_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_nervous_df, phi = extract_phi(c_SISTRAT_c1_nervous))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_nervous_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_nervous))

sir_other_causes_df <- 
  cbind.data.frame(
    total = "Other causes",
    observed = round(sr_1_other_causes_df$observed, 0),
    pyrs = round(sr_1_other_causes_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_other_causes_df$observed, sr_1_other_causes_df$pyrs, phi = 1))))),
    expected = round(sr_1_other_causes_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_other_causes_df, phi = extract_phi(c_SISTRAT_c1_other_causes))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_other_causes_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_other_causes))

sir_respiratory_df <- 
  cbind.data.frame(
    total = "Respiratory System Diseases",
    observed = round(sr_1_respiratory_df$observed, 0),
    pyrs = round(sr_1_respiratory_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_respiratory_df$observed, sr_1_respiratory_df$pyrs, phi = 1))))),
    expected = round(sr_1_respiratory_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_respiratory_df, phi = extract_phi(c_SISTRAT_c1_respiratory))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_respiratory_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_respiratory))

sir_symptoms_signs_df <- 
  cbind.data.frame(
    total = "Symptoms, Signs and Abnormal Findings",
    observed = round(sr_1_symptoms_signs_df$observed, 0),
    pyrs = round(sr_1_symptoms_signs_df$pyrs, 0),
    CMR_1000 = do.call(sprintf, c("%.1f (%.1f–%.1f)", as.list(unlist(cmr_ci_phi(sr_1_symptoms_signs_df$observed, sr_1_symptoms_signs_df$pyrs, phi = 1))))),
    expected = round(sr_1_symptoms_signs_df$expected, 0),
    SMR = do.call(sprintf, c("%.6f (%.6f–%.6f)", as.list(unlist(sir_ci_phi_improved(sr_1_symptoms_signs_df, phi = extract_phi(c_SISTRAT_c1_symptoms_signs))[, 1:3])))),
    EAR = as.character(sprintf("%.2f", sr_1_symptoms_signs_df$EAR)), 
    phi = extract_phi(c_SISTRAT_c1_symptoms_signs))


map <- c(
  "Infectious and Parasitic Diseases"     = "Infectious & parasitic",
  "Malignant neoplasms"                   = "Malignant neoplasms",
  "Endocrine and Metabolic Diseases"      = "Endocrine & metabolic",
  "Mental and behavioral"                 = "Mental and behavioral",
  "Nervous system"                        = "Nervous system",
  "Circulatory System Diseases"           = "Circulatory",
  "Respiratory System Diseases"           = "Respiratory",
  "Digestive System Diseases"             = "Digestive",
  "Symptoms, Signs and Abnormal Findings" = "Symptoms & signs",
  "Other causes"                          = "Other underlying causes"
)



cat("Dispersion-corrected 95% confidence intervals\n")
bind_rows(sir_circulatory_df,  sir_digestive_df, sir_endocrine_metabolic_df,  
          sir_infectious_parasitic_df, sir_malignant_neoplasms_df, sir_mental_df, sir_nervous_df, sir_other_causes_df, sir_respiratory_df, sir_symptoms_signs_df)|> 
  rename("Characteristic"="total")|>
  (\(df) {
    df->> df_smr_ind_non_ext
    df
  })()|> 
  tidyr::extract(
    SMR,
    into   = c("est", "low", "high"),
    regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
    convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.2f (%.2f–%.2f)", est, low, high))|>
  dplyr::select(-est, -low, -high)|>  
  dplyr::mutate(obs_exp= paste0(observed, "/", expected))|>
  dplyr::mutate(key = map[Characteristic],
         ord = match(key, order_levels))|>
  dplyr::arrange(ord)|>
  dplyr::select(-key, -ord)|>
  knitr::kable("markdown", caption="All-cause SMRs for patients who accessed SUD treatment by sex and age group. Non-external causes")
Dispersion-corrected 95% confidence intervals
All-cause SMRs for patients who accessed SUD treatment by sex and age group. Non-external causes
Characteristic observed pyrs CMR_1000 expected EAR phi SMR_dir obs_exp
Infectious and Parasitic Diseases 129 353826 0.4 (0.3–0.4) 43 0.24 0.6533812 2.97 (2.58–3.41) 129/43
Malignant neoplasms 220 353826 0.6 (0.5–0.7) 205 0.04 1.0672357 1.07 (0.94–1.23) 220/205
Endocrine and Metabolic Diseases 42 353826 0.1 (0.1–0.2) 28 0.04 0.5089105 1.52 (1.23–1.89) 42/28
Mental and behavioral 49 353826 0.1 (0.1–0.2) 7 0.12 0.4737777 6.71 (5.53–8.13) 49/7
Nervous system 47 353826 0.1 (0.1–0.2) 27 0.06 0.7932892 1.74 (1.35–2.25) 47/27
Circulatory System Diseases 407 353826 1.2 (1.0–1.3) 170 0.67 0.7945663 2.40 (2.20–2.62) 407/170
Respiratory System Diseases 181 353826 0.5 (0.4–0.6) 45 0.38 0.7598600 4.06 (3.57–4.60) 181/45
Digestive System Diseases 704 353826 2.0 (1.8–2.1) 113 1.67 1.5908436 6.22 (5.67–6.83) 704/113
Symptoms, Signs and Abnormal Findings 107 353826 0.3 (0.3–0.4) 27 0.22 0.6869083 3.89 (3.33–4.56) 107/27
Other causes 53 353826 0.1 (0.1–0.2) 42 0.03 1.1576887 1.25 (0.94–1.67) 53/42
Code
r2_adj_circulatory <-
  rate(
    data    = c_SISTRAT_c1_circulatory,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_circulatory <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_circulatory$rate.adj,           # primer vector (rate)
  r2_adj_circulatory$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_circulatory),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_digestive <-
  rate(
    data    = c_SISTRAT_c1_digestive,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_digestive <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_digestive$rate.adj,           # primer vector (rate)
  r2_adj_digestive$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_digestive),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_endocrine_metabolic <-
  rate(
    data    = c_SISTRAT_c1_endocrine_metabolic,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_endocrine_metabolic <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_endocrine_metabolic$rate.adj,           # primer vector (rate)
  r2_adj_endocrine_metabolic$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_endocrine_metabolic),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_infectious_parasitic <-
  rate(
    data    = c_SISTRAT_c1_infectious_parasitic,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_infectious_parasitic <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_infectious_parasitic$rate.adj,           # primer vector (rate)
  r2_adj_infectious_parasitic$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_infectious_parasitic),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_malignant_neoplasms <-
  rate(
    data    = c_SISTRAT_c1_malignant_neoplasms,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_malignant_neoplasms <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_malignant_neoplasms$rate.adj,           # primer vector (rate)
  r2_adj_malignant_neoplasms$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_malignant_neoplasms),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_mental <-
  rate(
    data    = c_SISTRAT_c1_mental,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_mental <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_mental$rate.adj,           # primer vector (rate)
  r2_adj_mental$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_mental),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_nervous <-
  rate(
    data    = c_SISTRAT_c1_nervous,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_nervous <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_nervous$rate.adj,           # primer vector (rate)
  r2_adj_nervous$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_nervous),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_other_causes <-
  rate(
    data    = c_SISTRAT_c1_other_causes,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_other_causes <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_other_causes$rate.adj,           # primer vector (rate)
  r2_adj_other_causes$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_other_causes),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_respiratory <-
  rate(
    data    = c_SISTRAT_c1_respiratory,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_respiratory <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_respiratory$rate.adj,           # primer vector (rate)
  r2_adj_respiratory$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_respiratory),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

r2_adj_symptoms_signs <-
  rate(
    data    = c_SISTRAT_c1_symptoms_signs,
    obs     = from0to1,
    pyrs    = pyrs,
    #print   = year,
    adjust  = c("year", "sex", "agegroup"),
    weights = weights_df #weights inglm should be applied in the offset
  )

DSR_1k_symptoms_signs <- mapply(
  dsr_format_corr,                 # FUN
  r2_adj_symptoms_signs$rate.adj,           # primer vector (rate)
  r2_adj_symptoms_signs$SE.rate.adj,        # segundo vector (se)
  MoreArgs = list(            # argumentos fijos extra
    phi    = extract_phi_dir(c_SISTRAT_c1_symptoms_signs),
    factor = 1e3,
    digits = 6,
    conf   = 0.95))

tasa_ponderada_diseases <- mx_1x1_comp_filt3b |> 
  ungroup() |> 
  left_join(
    weights_df,
    by = c("year", "agegroup", "sex")) |> 
  summarise(
    haz_circulatory_w = sum(haz_circulatory * weights),
    haz_digestive_w = sum(haz_digestive * weights),
    haz_endocrine_metabolic_w = sum(haz_endocrine_metabolic * weights),
    haz_infectious_parasitic_w = sum(haz_infectious_parasitic * weights),
    haz_malignant_neoplasms_w = sum(haz_malignant_neoplasms * weights),
    haz_mental_w = sum(haz_mental * weights),
    haz_nervous_w = sum(haz_nervous * weights),
    haz_other_causes_w = sum(haz_other_causes * weights),
    haz_respiratory_w = sum(haz_respiratory * weights),
    haz_symptoms_signs_w = sum(haz_symptoms_signs * weights)
  ) *1e3

cbind.data.frame(
rbind.data.frame(
  cbind.data.frame(var="Circulatory System Diseases", t(r2_adj_circulatory[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_circulatory),
  cbind.data.frame(var="Digestive System Diseases", t(r2_adj_digestive[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_digestive),
  cbind.data.frame(var="Endocrine and Metabolic Diseases", t(r2_adj_endocrine_metabolic[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_endocrine_metabolic),
  cbind.data.frame(var="Infectious and Parasitic Diseases", t(r2_adj_infectious_parasitic[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_infectious_parasitic),
  cbind.data.frame(var="Malignant neoplasms", t(r2_adj_malignant_neoplasms[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_malignant_neoplasms),
  cbind.data.frame(var="Mental and behavioral", t(r2_adj_mental[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_mental),
  cbind.data.frame(var="Nervous system", t(r2_adj_nervous[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_nervous),
  cbind.data.frame(var="Other Causes", t(r2_adj_other_causes[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_other_causes),
  cbind.data.frame(var="Respiratory System Diseases", t(r2_adj_respiratory[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_respiratory),
  cbind.data.frame(var="Symptoms, Signs and Abnormal Findings", t(r2_adj_symptoms_signs[, c(rate, rate.lo, rate.hi, rate.adj, rate.adj.lo, rate.adj.hi)]), SMR_dir= DSR_1k_symptoms_signs)
), perc= t(round(tasa_ponderada_diseases,1)))|>
  (\(df) {
    rownames(df) <- NULL
    df->> df_smr_dir_diseases
    df
  })()|> 
  dplyr::mutate(Rate_95ci= sprintf("%.1f (%.1f–%.1f)", `1`*1000, `2`*1000, `3`*1000))|> 
  dplyr::mutate(AdjRate_95ci = sprintf("%.1f (%.1f–%.1f)", `4`*1000, `5`*1000, `6`*1000))|>
  dplyr::select(-any_of(2:7))|>
  tidyr::extract(
    SMR_dir,
    into   = c("est", "low", "high"),
    regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
    convert = TRUE            # convierte a numérico
  )|>
  dplyr::mutate(across(c(est, low, high), \(x) round(x, 2)),
                SMR_dir = sprintf("%.1f (%.1f–%.1f)", est, low, high))|>  
  dplyr::select(-est, -low, -high)|>  
  rename("DSR (SEs robust to dispersion)"="SMR_dir", "DSR"="AdjRate_95ci", "CMR"="Rate_95ci")|>
  dplyr::mutate(key = map[var],
                ord = match(key, order_levels))|>
  dplyr::arrange(ord)|>
  dplyr::select(-key, -ord)|>
  knitr::kable("markdown", caption= "Disease-specific SMRs, direct method, underlying")
Disease-specific SMRs, direct method, underlying
var perc CMR DSR DSR (SEs robust to dispersion)
Infectious and Parasitic Diseases 0.1 0.4 (0.3–0.4) 0.4 (0.3–0.7) 0.4 (0.3–0.6)
Malignant neoplasms 0.9 0.6 (0.5–0.7) 1.2 (0.9–1.6) 1.2 (0.9–1.6)
Endocrine and Metabolic Diseases 0.1 0.1 (0.1–0.2) 0.1 (0.1–0.2) 0.1 (0.1–0.2)
Mental and behavioral 0.0 0.1 (0.1–0.2) 0.1 (0.1–0.2) 0.1 (0.1–0.1)
Nervous system 0.1 0.1 (0.1–0.2) 0.3 (0.1–0.7) 0.3 (0.1–0.6)
Circulatory System Diseases 0.6 1.2 (1.0–1.3) 1.7 (1.2–2.5) 1.8 (1.3–2.4)
Respiratory System Diseases 0.2 0.5 (0.4–0.6) 0.5 (0.4–0.7) 0.5 (0.4–0.7)
Digestive System Diseases 0.4 2.0 (1.8–2.1) 2.9 (2.3–3.7) 2.9 (2.2–3.8)
Symptoms, Signs and Abnormal Findings 0.1 0.3 (0.3–0.4) 0.6 (0.3–1.0) 0.6 (0.3–0.9)
Other Causes 0.2 0.1 (0.1–0.2) 0.2 (0.1–0.2) 0.2 (0.1–0.2)
Code
sep_ind_ext<- 
    tibble::tibble(type= "Indirect, main", raw = df_smr_ind_ext$SMR) %>%
    extract(
        raw,
        into   = c("estimate", "lower", "upper"),
        regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
        convert = TRUE            # convierte a numérico
    )|>
    dplyr::select(type, estimate, lower, upper)

sep_ind_ext <- bind_cols(Characteristic=df_smr_ind_ext$Characteristic, sep_ind_ext)
sep_dir_ext<- 
    tibble(type= "Direct, main", raw = df_smr_dir_ext$SMR_dir) %>%
    extract(
        raw,
        into   = c("estimate", "lower", "upper"),
        regex  = "^\\s*([0-9.]+)\\s*\\(([^–-]+)[–-]([^)]+)\\)\\s*$",
        convert = TRUE            # convierte a numérico
    )|>
    dplyr::select(type, estimate, lower, upper)
sep_dir_ext <- bind_cols(Characteristic=sep_dir_ext$var, sep_ind_ext)

Warning: Unknown or uninitialised column: var.

Code
variances_ind_ext <- ((log(sep_ind_ext$upper[c(1,4)]) - log(sep_ind_ext$estimate[c(1,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe <- rma(yi = log(sep_ind_ext$estimate[c(1,4)]), sei = sqrt(variances_ind_ext), method = "FE")

variances_ind_ext2 <- ((log(sep_ind_ext$upper[c(2,4)]) - log(sep_ind_ext$estimate[c(2,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe2 <- rma(yi = log(sep_ind_ext$estimate[c(2,4)]), sei = sqrt(variances_ind_ext2), method = "FE")

variances_ind_ext3 <- ((log(sep_ind_ext$upper[c(3,4)]) - log(sep_ind_ext$estimate[c(3,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe3 <- rma(yi = log(sep_ind_ext$estimate[c(3,4)]), sei = sqrt(variances_ind_ext3), method = "FE")

variances_ind_ext4 <- ((log(sep_ind_ext$upper[c(5,4)]) - log(sep_ind_ext$estimate[c(5,4)])) / qnorm(0.975))^2
meta_fe_ind_ext_fe4 <- rma(yi = log(sep_ind_ext$estimate[c(5,4)]), sei = sqrt(variances_ind_ext4), method = "FE")



variances_dir_ext <- ((log(sep_dir_ext$upper[c(1,4)]) - log(sep_dir_ext$estimate[c(1,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe <- rma(yi = log(sep_dir_ext$estimate[c(1,4)]), sei = sqrt(variances_dir_ext), method = "FE")

variances_dir_ext2 <- ((log(sep_dir_ext$upper[c(2,4)]) - log(sep_dir_ext$estimate[c(2,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe2 <- rma(yi = log(sep_dir_ext$estimate[c(2,4)]), sei = sqrt(variances_dir_ext2), method = "FE")

variances_dir_ext3 <- ((log(sep_dir_ext$upper[c(3,4)]) - log(sep_dir_ext$estimate[c(3,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe3 <- rma(yi = log(sep_dir_ext$estimate[c(3,4)]), sei = sqrt(variances_dir_ext3), method = "FE")

variances_dir_ext4 <- ((log(sep_dir_ext$upper[c(5,4)]) - log(sep_dir_ext$estimate[c(5,4)])) / qnorm(0.975))^2
meta_fe_dir_ext_fe4 <- rma(yi = log(sep_dir_ext$estimate[c(5,4)]), sei = sqrt(variances_dir_ext4), method = "FE")

bind_rows(
cbind.data.frame(type= "", comp= "Assaults vs. No external causes", Q= meta_fe_ind_ext_fe$QE, p= meta_fe_ind_ext_fe$QEp, Q_b= meta_fe_dir_ext_fe$QE, p_b= meta_fe_dir_ext_fe$QEp),
cbind.data.frame(type= "", comp= "Intentional self-harm vs. No external causes", Q= meta_fe_ind_ext_fe2$QE, p= meta_fe_ind_ext_fe2$QEp, Q_b= meta_fe_dir_ext_fe2$QE, p_b= meta_fe_dir_ext_fe2$QEp),
cbind.data.frame(type= "", comp= "Other unintentional causes of injury vs. No external causes", Q= meta_fe_ind_ext_fe3$QE, p= meta_fe_ind_ext_fe3$QEp, Q_b= meta_fe_dir_ext_fe3$QE, p_b= meta_fe_dir_ext_fe3$QEp),
cbind.data.frame(type= "", comp= "Transport accidents vs. No external causes", Q= meta_fe_ind_ext_fe4$QE, p= meta_fe_ind_ext_fe4$QEp, Q_b= meta_fe_dir_ext_fe4$QE, p_b= meta_fe_dir_ext_fe4$QEp)
)|> 
    mutate(
    Qa_SMR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q, p),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q, p)
    ),
    Qa_DSR = case_when(
      str_detect(comp, "Age") ~ sprintf("Q %.2f (df=3), p=%.3f", Q_b, p_b),
      TRUE                    ~ sprintf("Q %.2f (df=1), p=%.3f", Q_b, p_b)
    )
  ) |> dplyr::select(type, comp, Qa_SMR, Qa_DSR) |> 
  knitr::kable("markdown", caption= "Heterogeneity, external causes")
Heterogeneity, external causes
type comp Qa_SMR Qa_DSR
Assaults vs. No external causes Q 11.56 (df=1), p=0.001 Q 11.56 (df=1), p=0.001
Intentional self-harm vs. No external causes Q 108.12 (df=1), p=0.000 Q 108.12 (df=1), p=0.000
Other unintentional causes of injury vs. No external causes Q 22.35 (df=1), p=0.000 Q 22.35 (df=1), p=0.000
Transport accidents vs. No external causes Q 9.78 (df=1), p=0.002 Q 9.78 (df=1), p=0.002

To close the project, we erase polars objects.

Code
rm(list = ls()[grepl("_pl$", ls())])

Session info

Code
#|echo: true
#|error: true
#|message: true
#|paged.print: true
message(paste0("R library: ", Sys.getenv("R_LIBS_USER")))

R library: G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32

Code
message(paste0("Date: ",withr::with_locale(new = c('LC_TIME' = 'C'), code =Sys.time())))

Date: 2025-08-21 20:50:51.697498

Code
message(paste0("Editor context: ", path))

Editor context: E:/Mi unidad/Alvacast/SISTRAT 2023/cons

Code
cat("quarto version: "); quarto::quarto_version()
quarto version: 
[1] '1.7.29'
Code
sesion_info <- devtools::session_info()

Warning in system2(“quarto”, “-V”, stdout = TRUE, env = paste0(“TMPDIR=”, : el comando ejecutado ‘“quarto” TMPDIR=C:/Users/andre/AppData/Local/Temp/RtmpUdCeX0/filec2083f0f34d4 -V’ tiene el estatus 1

Code
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Package' = 2, 'Version'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('R packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '70%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",
        "}")))
Code
#|echo: true
#|error: true
#|message: true
#|paged.print: true
#|class-output: center-table
#|eval: false

cat("Python version\n")
Python version
Code
reticulate::py_config()
python:         G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe
libpython:      G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python311.dll
pythonhome:     G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311
version:        3.11.5 | packaged by conda-forge | (main, Aug 27 2023, 03:23:48) [MSC v.1936 64 bit (AMD64)]
Architecture:   64bit
numpy:           [NOT FOUND]

NOTE: Python version was forced by RETICULATE_PYTHON
Code
reticulate::py_list_packages() %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Package' = 2, 'Version'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Python packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '70%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",
        "}"))) 

Warning in system2(python, args, stdout = TRUE): el comando ejecutado ‘“G:/My Drive/Alvacast/SISTRAT 2023/.mamba_root/envs/py311/python.exe” -m pip freeze’ tiene el estatus 1

Save

Code
wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}

paste0(getwd(),"/cons")
file.path(paste0(wdpath,"data/20241015_out"))
file.path(paste0(envpath,"data/20241015_out"))

# Save
rdata_path <- file.path(wdpath, "data/20241015_out", paste0("mort_", format(Sys.time(), "%Y_%m_%d"), ".Rdata"))

save.image(rdata_path)
cat("Saved in:",
    rdata_path)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
  password <- Sys.getenv("PASSWORD_SECRET")
} else {
  if (interactive()) {
    utils::savehistory(tempfile())
    Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
    utils::loadhistory()
  }
  Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
save.image(paste0(rdata_path,".enc"))

# Encriptar el archivo en el mismo lugar
httr2::secret_encrypt_file(path = paste0(rdata_path,".enc"), key = "PASSWORD_SECRET")

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Copy renv lock into cons folder\n")

if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
  message("Running on RStudio Server or inside Docker. Folder copy skipped.")

} else {
    
  source_folder <- 
  destination_folder <- paste0(wdpath,"cons/renv")
  
  # Copy the folder recursively
    file.copy(paste0(wdpath,"renv.lock"), paste0(wdpath,"cons/renv.lock"), overwrite = TRUE)
  
  message("Renv lock copy performed.")
}

Renv lock copy performed.

Code
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
time_after_dedup2<-Sys.time()

paste0("Time in markdown: ");time_after_dedup2-time_before_dedup2
[1] "G:/My Drive/Alvacast/SISTRAT 2023/cons/cons"
[1] "G:/My Drive/Alvacast/SISTRAT 2023//data/20241015_out"
[1] "G:/Mi unidad/Alvacast/SISTRAT 2023/data/20241015_out"
Saved in: G:/My Drive/Alvacast/SISTRAT 2023///data/20241015_out/mort_2025_08_21.RdataCopy renv lock into cons folder
[1] "Time in markdown: "
Time difference of 76.19106 days
Back to top